[networking] integrate watchwebsite module to networking + doc and reworking

This commit is contained in:
nemunaire 2014-12-04 21:51:28 +01:00
parent c75d85b88b
commit 5dcf0d6961
3 changed files with 121 additions and 72 deletions

View File

@ -11,17 +11,20 @@ from more import Response
from . import isup
from . import page
from . import w3c
from . import watchWebsite
from . import whois
def load(context):
for mod in [isup, page, w3c, whois]:
for mod in [isup, page, w3c, watchWebsite, whois]:
mod.IRCException = IRCException
mod.ModuleEvent = ModuleEvent
mod.add_event = add_event
mod.save = save
mod.print = print
mod.print_debug = print_debug
mod.send_response = send_response
page.load(CONF, add_hook)
watchWebsite.load(DATAS)
whois.load(CONF, add_hook)
@ -107,3 +110,21 @@ def cmd_w3c(msg):
res.append_message("%s%s on line %s, col %s: %s" % (m["type"][0].upper(), m["type"][1:], m["lastLine"], m["lastColumn"], m["message"]))
return res
@hook("cmd_hook", "watch", data="diff")
@hook("cmd_hook", "updown", data="updown")
def cmd_watch(msg, diffType="diff"):
if len(msg.cmds) <= 1:
raise IRCException("indicate an URL to watch!")
return watchWebsite.add_site(msg.cmds[1])
@hook("cmd_hook", "unwatch")
def cmd_unwatch(msg):
if len(msg.cmds) <= 1:
raise IRCException("which URL should I stop watching?")
return watchWebsite.add_site(msg.cmds[1])

View File

@ -6,8 +6,10 @@ from xml.dom.minidom import parse
from xml.dom.minidom import parseString
from xml.dom.minidom import getDOMImplementation
class AtomEntry:
def __init__ (self, node):
def __init__(self, node):
self.id = node.getElementsByTagName("id")[0].firstChild.nodeValue
if node.getElementsByTagName("title")[0].firstChild is not None:
self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
@ -19,36 +21,38 @@ class AtomEntry:
try:
self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10], "%Y-%m-%d")
except:
print (node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10])
self.updated = time.localtime ()
print(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10])
self.updated = time.localtime()
if len(node.getElementsByTagName("summary")) > 0 and node.getElementsByTagName("summary")[0].firstChild is not None:
self.summary = node.getElementsByTagName("summary")[0].firstChild.nodeValue
else:
self.summary = None
if len(node.getElementsByTagName("link")) > 0:
self.link = node.getElementsByTagName("link")[0].getAttribute ("href")
self.link = node.getElementsByTagName("link")[0].getAttribute("href")
else:
self.link = None
if len (node.getElementsByTagName("category")) >= 1:
self.category = node.getElementsByTagName("category")[0].getAttribute ("term")
if len(node.getElementsByTagName("category")) >= 1:
self.category = node.getElementsByTagName("category")[0].getAttribute("term")
else:
self.category = None
if len (node.getElementsByTagName("link")) > 1:
self.link2 = node.getElementsByTagName("link")[1].getAttribute ("href")
if len(node.getElementsByTagName("link")) > 1:
self.link2 = node.getElementsByTagName("link")[1].getAttribute("href")
else:
self.link2 = None
class Atom:
def __init__ (self, string):
def __init__(self, string):
self.raw = string
self.feed = parseString (string).documentElement
self.feed = parseString(string).documentElement
self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue
self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
self.updated = None
self.entries = dict ()
self.entries = dict()
for item in self.feed.getElementsByTagName("entry"):
entry = AtomEntry (item)
entry = AtomEntry(item)
self.entries[entry.id] = entry
if self.updated is None or self.updated < entry.updated:
self.updated = entry.updated
@ -56,13 +60,13 @@ class Atom:
def __str__(self):
return self.raw
def diff (self, other):
differ = list ()
for k in other.entries.keys ():
def diff(self, other):
differ = list()
for k in other.entries.keys():
if self.updated is None and k not in self.entries:
self.updated = other.entries[k].updated
if k not in self.entries and other.entries[k].updated >= self.updated:
differ.append (other.entries[k])
differ.append(other.entries[k])
return differ
@ -75,10 +79,10 @@ if __name__ == "__main__":
with open("rss.php", "r") as f:
for line in f:
content2 += line
a = Atom (content1)
print (a.updated)
b = Atom (content2)
print (b.updated)
a = Atom(content1)
print(a.updated)
b = Atom(content2)
print(b.updated)
diff = a.diff (b)
print (diff)
diff = a.diff(b)
print(diff)

View File

@ -1,31 +1,20 @@
# coding=utf-8
"""Alert on changes on websites"""
from datetime import datetime
from datetime import timedelta
import http.client
import hashlib
from random import randint
import re
import socket
import sys
import urllib.parse
from urllib.parse import urlparse
from hooks import hook
from more import Response
nemubotversion = 3.4
from networking import page
from .atom import Atom
from more import Response
from . import page
def help_full():
return "This module is autonomous you can't interract with it."
def load(context):
"""Register watched website"""
def load(DATAS):
"""Register events on watched website"""
DATAS.setIndex("url", "watch")
for site in DATAS.getNodes("watch"):
if site.hasNode("alert"):
@ -34,23 +23,13 @@ def load(context):
print("No alert defined for this site: " + site["url"])
#DATAS.delChild(site)
def start_watching(site, offset=0):
o = urlparse(site["url"], "http")
print_debug("Add event for site: %s" % o.netloc)
evt = ModuleEvent(func=lambda url: page.render(url, None),
cmp_data=site["lastcontent"],
func_data=site["url"], offset=offset,
interval=site.getInt("time"),
call=alert_change, call_data=site)
site["_evt_id"] = add_event(evt)
def del_site(url):
"""Remove a site from watching list
@hook("cmd_hook", "unwatch")
def del_site(msg):
if len(msg.cmds) <= 1:
raise IRCException("quel site dois-je arrêter de surveiller ?")
url = msg.cmds[1]
Argument:
url -- URL to unwatch
"""
o = urlparse(url, "http")
if o.scheme != "" and url in DATAS.index:
@ -58,35 +37,33 @@ def del_site(msg):
for a in site.getNodes("alert"):
if a["channel"] == msg.channel:
if not (msg.frm == a["nick"] or msg.frm_owner):
raise IRCException("vous ne pouvez pas supprimer cette URL.")
raise IRCException("you cannot unwatch this URL.")
site.delChild(a)
if not site.hasNode("alert"):
del_event(site["_evt_id"])
DATAS.delChild(site)
save()
return Response("je ne surveille désormais plus cette URL.",
return Response("I don't watch this URL anymore.",
channel=msg.channel, nick=msg.nick)
raise IRCException("je ne surveillais pas cette URL !")
raise IRCException("I didn't watch this URL!")
@hook("cmd_hook", "watch", data="diff")
@hook("cmd_hook", "updown", data="updown")
def add_site(msg, diffType="diff"):
print (diffType)
if len(msg.cmds) <= 1:
raise IRCException("quel site dois-je surveiller ?")
def add_site(url):
"""Add a site to watching list
url = msg.cmds[1]
Argument:
url -- URL to watch
"""
o = urlparse(url, "http")
if o.netloc == "":
raise IRCException("je ne peux pas surveiller cette URL")
raise IRCException("sorry, I can't watch this URL :(")
alert = ModuleState("alert")
alert["nick"] = msg.nick
alert["server"] = msg.server
alert["channel"] = msg.channel
alert["message"] = "{url} a changé !"
alert["message"] = "{url} just changed!"
if url not in DATAS.index:
watch = ModuleState("watch")
@ -101,15 +78,41 @@ def add_site(msg, diffType="diff"):
save()
return Response(channel=msg.channel, nick=msg.nick,
message="ce site est maintenant sous ma surveillance.")
message="this site is now under my supervision.")
def format_response(site, link='%s', title='%s', categ='%s', content='%s'):
"""Format and send response for given site
Argument:
site -- DATAS structure representing a site to watch
Keyword arguments:
link -- link to the content
title -- for ATOM feed: title of the new article
categ -- for ATOM feed: category of the new article
content -- content of the page/new article
"""
for a in site.getNodes("alert"):
send_response(a["server"], Response(a["message"].format(url=site["url"], link=link, title=title, categ=categ, content=content),
channel=a["channel"], server=a["server"]))
send_response(a["server"],
Response(a["message"].format(url=site["url"],
link=link,
title=title,
categ=categ,
content=content),
channel=a["channel"],
server=a["server"]))
def alert_change(content, site):
"""Alert when a change is detected"""
"""Function called when a change is detected on a given site
Arguments:
content -- The new content
site -- DATAS structure representing a site to watch
"""
if site["type"] == "updown":
if site["lastcontent"] is None:
site["lastcontent"] = content is not None
@ -132,7 +135,7 @@ def alert_change(content, site):
try:
page = Atom(content)
except:
print ("An error occurs during Atom parsing. Restart event...")
print("An error occurs during Atom parsing. Restart event...")
start_watching(site)
return
diff = site["_lastpage"].diff(page)
@ -152,10 +155,31 @@ def alert_change(content, site):
format_response(site, link=d.link, title=urllib.parse.unquote(d.title))
else:
start_watching(site)
return #Stop here, no changes, so don't save
return # Stop here, no changes, so don't save
else: # Just looking for any changes
else: # Just looking for any changes
format_response(site, link=site["url"], content=content)
site["lastcontent"] = content
start_watching(site)
save()
def start_watching(site, offset=0):
"""Launch the event watching given site
Argument:
site -- DATAS structure representing a site to watch
Keyword argument:
offset -- offset time to delay the launch of the first check
"""
o = urlparse(site["url"], "http")
print_debug("Add %s event for site: %s" % (site["type"], o.netloc))
evt = ModuleEvent(func=lambda url: page.render(url, None),
cmp_data=site["lastcontent"],
func_data=site["url"], offset=offset,
interval=site.getInt("time"),
call=alert_change, call_data=site)
site["_evt_id"] = add_event(evt)