[networking] integrate watchwebsite module to networking + doc and reworking

This commit is contained in:
nemunaire 2014-12-04 21:51:28 +01:00
commit 5dcf0d6961
3 changed files with 121 additions and 72 deletions

View file

@ -11,17 +11,20 @@ from more import Response
from . import isup from . import isup
from . import page from . import page
from . import w3c from . import w3c
from . import watchWebsite
from . import whois from . import whois
def load(context): def load(context):
for mod in [isup, page, w3c, whois]: for mod in [isup, page, w3c, watchWebsite, whois]:
mod.IRCException = IRCException mod.IRCException = IRCException
mod.ModuleEvent = ModuleEvent mod.ModuleEvent = ModuleEvent
mod.add_event = add_event mod.add_event = add_event
mod.save = save mod.save = save
mod.print = print mod.print = print
mod.print_debug = print_debug mod.print_debug = print_debug
mod.send_response = send_response
page.load(CONF, add_hook) page.load(CONF, add_hook)
watchWebsite.load(DATAS)
whois.load(CONF, add_hook) whois.load(CONF, add_hook)
@ -107,3 +110,21 @@ def cmd_w3c(msg):
res.append_message("%s%s on line %s, col %s: %s" % (m["type"][0].upper(), m["type"][1:], m["lastLine"], m["lastColumn"], m["message"])) res.append_message("%s%s on line %s, col %s: %s" % (m["type"][0].upper(), m["type"][1:], m["lastLine"], m["lastColumn"], m["message"]))
return res return res
@hook("cmd_hook", "watch", data="diff")
@hook("cmd_hook", "updown", data="updown")
def cmd_watch(msg, diffType="diff"):
if len(msg.cmds) <= 1:
raise IRCException("indicate an URL to watch!")
return watchWebsite.add_site(msg.cmds[1])
@hook("cmd_hook", "unwatch")
def cmd_unwatch(msg):
if len(msg.cmds) <= 1:
raise IRCException("which URL should I stop watching?")
return watchWebsite.add_site(msg.cmds[1])

View file

@ -6,8 +6,10 @@ from xml.dom.minidom import parse
from xml.dom.minidom import parseString from xml.dom.minidom import parseString
from xml.dom.minidom import getDOMImplementation from xml.dom.minidom import getDOMImplementation
class AtomEntry: class AtomEntry:
def __init__ (self, node):
def __init__(self, node):
self.id = node.getElementsByTagName("id")[0].firstChild.nodeValue self.id = node.getElementsByTagName("id")[0].firstChild.nodeValue
if node.getElementsByTagName("title")[0].firstChild is not None: if node.getElementsByTagName("title")[0].firstChild is not None:
self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
@ -19,36 +21,38 @@ class AtomEntry:
try: try:
self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10], "%Y-%m-%d") self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10], "%Y-%m-%d")
except: except:
print (node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10]) print(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10])
self.updated = time.localtime () self.updated = time.localtime()
if len(node.getElementsByTagName("summary")) > 0 and node.getElementsByTagName("summary")[0].firstChild is not None: if len(node.getElementsByTagName("summary")) > 0 and node.getElementsByTagName("summary")[0].firstChild is not None:
self.summary = node.getElementsByTagName("summary")[0].firstChild.nodeValue self.summary = node.getElementsByTagName("summary")[0].firstChild.nodeValue
else: else:
self.summary = None self.summary = None
if len(node.getElementsByTagName("link")) > 0: if len(node.getElementsByTagName("link")) > 0:
self.link = node.getElementsByTagName("link")[0].getAttribute ("href") self.link = node.getElementsByTagName("link")[0].getAttribute("href")
else: else:
self.link = None self.link = None
if len (node.getElementsByTagName("category")) >= 1: if len(node.getElementsByTagName("category")) >= 1:
self.category = node.getElementsByTagName("category")[0].getAttribute ("term") self.category = node.getElementsByTagName("category")[0].getAttribute("term")
else: else:
self.category = None self.category = None
if len (node.getElementsByTagName("link")) > 1: if len(node.getElementsByTagName("link")) > 1:
self.link2 = node.getElementsByTagName("link")[1].getAttribute ("href") self.link2 = node.getElementsByTagName("link")[1].getAttribute("href")
else: else:
self.link2 = None self.link2 = None
class Atom: class Atom:
def __init__ (self, string):
def __init__(self, string):
self.raw = string self.raw = string
self.feed = parseString (string).documentElement self.feed = parseString(string).documentElement
self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue
self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
self.updated = None self.updated = None
self.entries = dict () self.entries = dict()
for item in self.feed.getElementsByTagName("entry"): for item in self.feed.getElementsByTagName("entry"):
entry = AtomEntry (item) entry = AtomEntry(item)
self.entries[entry.id] = entry self.entries[entry.id] = entry
if self.updated is None or self.updated < entry.updated: if self.updated is None or self.updated < entry.updated:
self.updated = entry.updated self.updated = entry.updated
@ -56,13 +60,13 @@ class Atom:
def __str__(self): def __str__(self):
return self.raw return self.raw
def diff (self, other): def diff(self, other):
differ = list () differ = list()
for k in other.entries.keys (): for k in other.entries.keys():
if self.updated is None and k not in self.entries: if self.updated is None and k not in self.entries:
self.updated = other.entries[k].updated self.updated = other.entries[k].updated
if k not in self.entries and other.entries[k].updated >= self.updated: if k not in self.entries and other.entries[k].updated >= self.updated:
differ.append (other.entries[k]) differ.append(other.entries[k])
return differ return differ
@ -75,10 +79,10 @@ if __name__ == "__main__":
with open("rss.php", "r") as f: with open("rss.php", "r") as f:
for line in f: for line in f:
content2 += line content2 += line
a = Atom (content1) a = Atom(content1)
print (a.updated) print(a.updated)
b = Atom (content2) b = Atom(content2)
print (b.updated) print(b.updated)
diff = a.diff (b) diff = a.diff(b)
print (diff) print(diff)

View file

@ -1,31 +1,20 @@
# coding=utf-8
"""Alert on changes on websites""" """Alert on changes on websites"""
from datetime import datetime
from datetime import timedelta
import http.client
import hashlib
from random import randint from random import randint
import re
import socket
import sys
import urllib.parse import urllib.parse
from urllib.parse import urlparse from urllib.parse import urlparse
from hooks import hook from hooks import hook
from more import Response
nemubotversion = 3.4 nemubotversion = 3.4
from networking import page
from .atom import Atom from .atom import Atom
from more import Response from . import page
def help_full():
return "This module is autonomous you can't interract with it."
def load(context): def load(DATAS):
"""Register watched website""" """Register events on watched website"""
DATAS.setIndex("url", "watch") DATAS.setIndex("url", "watch")
for site in DATAS.getNodes("watch"): for site in DATAS.getNodes("watch"):
if site.hasNode("alert"): if site.hasNode("alert"):
@ -34,23 +23,13 @@ def load(context):
print("No alert defined for this site: " + site["url"]) print("No alert defined for this site: " + site["url"])
#DATAS.delChild(site) #DATAS.delChild(site)
def start_watching(site, offset=0):
o = urlparse(site["url"], "http")
print_debug("Add event for site: %s" % o.netloc)
evt = ModuleEvent(func=lambda url: page.render(url, None),
cmp_data=site["lastcontent"],
func_data=site["url"], offset=offset,
interval=site.getInt("time"),
call=alert_change, call_data=site)
site["_evt_id"] = add_event(evt)
def del_site(url):
"""Remove a site from watching list
@hook("cmd_hook", "unwatch") Argument:
def del_site(msg): url -- URL to unwatch
if len(msg.cmds) <= 1: """
raise IRCException("quel site dois-je arrêter de surveiller ?")
url = msg.cmds[1]
o = urlparse(url, "http") o = urlparse(url, "http")
if o.scheme != "" and url in DATAS.index: if o.scheme != "" and url in DATAS.index:
@ -58,35 +37,33 @@ def del_site(msg):
for a in site.getNodes("alert"): for a in site.getNodes("alert"):
if a["channel"] == msg.channel: if a["channel"] == msg.channel:
if not (msg.frm == a["nick"] or msg.frm_owner): if not (msg.frm == a["nick"] or msg.frm_owner):
raise IRCException("vous ne pouvez pas supprimer cette URL.") raise IRCException("you cannot unwatch this URL.")
site.delChild(a) site.delChild(a)
if not site.hasNode("alert"): if not site.hasNode("alert"):
del_event(site["_evt_id"]) del_event(site["_evt_id"])
DATAS.delChild(site) DATAS.delChild(site)
save() save()
return Response("je ne surveille désormais plus cette URL.", return Response("I don't watch this URL anymore.",
channel=msg.channel, nick=msg.nick) channel=msg.channel, nick=msg.nick)
raise IRCException("je ne surveillais pas cette URL !") raise IRCException("I didn't watch this URL!")
@hook("cmd_hook", "watch", data="diff") def add_site(url):
@hook("cmd_hook", "updown", data="updown") """Add a site to watching list
def add_site(msg, diffType="diff"):
print (diffType)
if len(msg.cmds) <= 1:
raise IRCException("quel site dois-je surveiller ?")
url = msg.cmds[1] Argument:
url -- URL to watch
"""
o = urlparse(url, "http") o = urlparse(url, "http")
if o.netloc == "": if o.netloc == "":
raise IRCException("je ne peux pas surveiller cette URL") raise IRCException("sorry, I can't watch this URL :(")
alert = ModuleState("alert") alert = ModuleState("alert")
alert["nick"] = msg.nick alert["nick"] = msg.nick
alert["server"] = msg.server alert["server"] = msg.server
alert["channel"] = msg.channel alert["channel"] = msg.channel
alert["message"] = "{url} a changé !" alert["message"] = "{url} just changed!"
if url not in DATAS.index: if url not in DATAS.index:
watch = ModuleState("watch") watch = ModuleState("watch")
@ -101,15 +78,41 @@ def add_site(msg, diffType="diff"):
save() save()
return Response(channel=msg.channel, nick=msg.nick, return Response(channel=msg.channel, nick=msg.nick,
message="ce site est maintenant sous ma surveillance.") message="this site is now under my supervision.")
def format_response(site, link='%s', title='%s', categ='%s', content='%s'): def format_response(site, link='%s', title='%s', categ='%s', content='%s'):
"""Format and send response for given site
Argument:
site -- DATAS structure representing a site to watch
Keyword arguments:
link -- link to the content
title -- for ATOM feed: title of the new article
categ -- for ATOM feed: category of the new article
content -- content of the page/new article
"""
for a in site.getNodes("alert"): for a in site.getNodes("alert"):
send_response(a["server"], Response(a["message"].format(url=site["url"], link=link, title=title, categ=categ, content=content), send_response(a["server"],
channel=a["channel"], server=a["server"])) Response(a["message"].format(url=site["url"],
link=link,
title=title,
categ=categ,
content=content),
channel=a["channel"],
server=a["server"]))
def alert_change(content, site): def alert_change(content, site):
"""Alert when a change is detected""" """Function called when a change is detected on a given site
Arguments:
content -- The new content
site -- DATAS structure representing a site to watch
"""
if site["type"] == "updown": if site["type"] == "updown":
if site["lastcontent"] is None: if site["lastcontent"] is None:
site["lastcontent"] = content is not None site["lastcontent"] = content is not None
@ -132,7 +135,7 @@ def alert_change(content, site):
try: try:
page = Atom(content) page = Atom(content)
except: except:
print ("An error occurs during Atom parsing. Restart event...") print("An error occurs during Atom parsing. Restart event...")
start_watching(site) start_watching(site)
return return
diff = site["_lastpage"].diff(page) diff = site["_lastpage"].diff(page)
@ -152,10 +155,31 @@ def alert_change(content, site):
format_response(site, link=d.link, title=urllib.parse.unquote(d.title)) format_response(site, link=d.link, title=urllib.parse.unquote(d.title))
else: else:
start_watching(site) start_watching(site)
return #Stop here, no changes, so don't save return # Stop here, no changes, so don't save
else: # Just looking for any changes else: # Just looking for any changes
format_response(site, link=site["url"], content=content) format_response(site, link=site["url"], content=content)
site["lastcontent"] = content site["lastcontent"] = content
start_watching(site) start_watching(site)
save() save()
def start_watching(site, offset=0):
"""Launch the event watching given site
Argument:
site -- DATAS structure representing a site to watch
Keyword argument:
offset -- offset time to delay the launch of the first check
"""
o = urlparse(site["url"], "http")
print_debug("Add %s event for site: %s" % (site["type"], o.netloc))
evt = ModuleEvent(func=lambda url: page.render(url, None),
cmp_data=site["lastcontent"],
func_data=site["url"], offset=offset,
interval=site.getInt("time"),
call=alert_change, call_data=site)
site["_evt_id"] = add_event(evt)