[networking] integrate watchwebsite module to networking + doc and reworking

This commit is contained in:
nemunaire 2014-12-04 21:51:28 +01:00
commit 5dcf0d6961
3 changed files with 121 additions and 72 deletions

View file

@ -11,17 +11,20 @@ from more import Response
from . import isup
from . import page
from . import w3c
from . import watchWebsite
from . import whois
def load(context):
for mod in [isup, page, w3c, whois]:
for mod in [isup, page, w3c, watchWebsite, whois]:
mod.IRCException = IRCException
mod.ModuleEvent = ModuleEvent
mod.add_event = add_event
mod.save = save
mod.print = print
mod.print_debug = print_debug
mod.send_response = send_response
page.load(CONF, add_hook)
watchWebsite.load(DATAS)
whois.load(CONF, add_hook)
@ -107,3 +110,21 @@ def cmd_w3c(msg):
res.append_message("%s%s on line %s, col %s: %s" % (m["type"][0].upper(), m["type"][1:], m["lastLine"], m["lastColumn"], m["message"]))
return res
@hook("cmd_hook", "watch", data="diff")
@hook("cmd_hook", "updown", data="updown")
def cmd_watch(msg, diffType="diff"):
if len(msg.cmds) <= 1:
raise IRCException("indicate an URL to watch!")
return watchWebsite.add_site(msg.cmds[1])
@hook("cmd_hook", "unwatch")
def cmd_unwatch(msg):
if len(msg.cmds) <= 1:
raise IRCException("which URL should I stop watching?")
return watchWebsite.add_site(msg.cmds[1])

View file

@ -0,0 +1,88 @@
#!/usr/bin/python3
# coding=utf-8
import time
from xml.dom.minidom import parse
from xml.dom.minidom import parseString
from xml.dom.minidom import getDOMImplementation
class AtomEntry:
def __init__(self, node):
self.id = node.getElementsByTagName("id")[0].firstChild.nodeValue
if node.getElementsByTagName("title")[0].firstChild is not None:
self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
else:
self.title = ""
try:
self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:19], "%Y-%m-%dT%H:%M:%S")
except:
try:
self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10], "%Y-%m-%d")
except:
print(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10])
self.updated = time.localtime()
if len(node.getElementsByTagName("summary")) > 0 and node.getElementsByTagName("summary")[0].firstChild is not None:
self.summary = node.getElementsByTagName("summary")[0].firstChild.nodeValue
else:
self.summary = None
if len(node.getElementsByTagName("link")) > 0:
self.link = node.getElementsByTagName("link")[0].getAttribute("href")
else:
self.link = None
if len(node.getElementsByTagName("category")) >= 1:
self.category = node.getElementsByTagName("category")[0].getAttribute("term")
else:
self.category = None
if len(node.getElementsByTagName("link")) > 1:
self.link2 = node.getElementsByTagName("link")[1].getAttribute("href")
else:
self.link2 = None
class Atom:
def __init__(self, string):
self.raw = string
self.feed = parseString(string).documentElement
self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue
self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
self.updated = None
self.entries = dict()
for item in self.feed.getElementsByTagName("entry"):
entry = AtomEntry(item)
self.entries[entry.id] = entry
if self.updated is None or self.updated < entry.updated:
self.updated = entry.updated
def __str__(self):
return self.raw
def diff(self, other):
differ = list()
for k in other.entries.keys():
if self.updated is None and k not in self.entries:
self.updated = other.entries[k].updated
if k not in self.entries and other.entries[k].updated >= self.updated:
differ.append(other.entries[k])
return differ
if __name__ == "__main__":
content1 = ""
with open("rss.php.1", "r") as f:
for line in f:
content1 += line
content2 = ""
with open("rss.php", "r") as f:
for line in f:
content2 += line
a = Atom(content1)
print(a.updated)
b = Atom(content2)
print(b.updated)
diff = a.diff(b)
print(diff)

View file

@ -0,0 +1,185 @@
"""Alert on changes on websites"""
from random import randint
import urllib.parse
from urllib.parse import urlparse
from hooks import hook
from more import Response
nemubotversion = 3.4
from .atom import Atom
from . import page
def load(DATAS):
"""Register events on watched website"""
DATAS.setIndex("url", "watch")
for site in DATAS.getNodes("watch"):
if site.hasNode("alert"):
start_watching(site, randint(-30, 30))
else:
print("No alert defined for this site: " + site["url"])
#DATAS.delChild(site)
def del_site(url):
"""Remove a site from watching list
Argument:
url -- URL to unwatch
"""
o = urlparse(url, "http")
if o.scheme != "" and url in DATAS.index:
site = DATAS.index[url]
for a in site.getNodes("alert"):
if a["channel"] == msg.channel:
if not (msg.frm == a["nick"] or msg.frm_owner):
raise IRCException("you cannot unwatch this URL.")
site.delChild(a)
if not site.hasNode("alert"):
del_event(site["_evt_id"])
DATAS.delChild(site)
save()
return Response("I don't watch this URL anymore.",
channel=msg.channel, nick=msg.nick)
raise IRCException("I didn't watch this URL!")
def add_site(url):
"""Add a site to watching list
Argument:
url -- URL to watch
"""
o = urlparse(url, "http")
if o.netloc == "":
raise IRCException("sorry, I can't watch this URL :(")
alert = ModuleState("alert")
alert["nick"] = msg.nick
alert["server"] = msg.server
alert["channel"] = msg.channel
alert["message"] = "{url} just changed!"
if url not in DATAS.index:
watch = ModuleState("watch")
watch["type"] = diffType
watch["url"] = url
watch["time"] = 123
DATAS.addChild(watch)
watch.addChild(alert)
start_watching(watch)
else:
DATAS.index[url].addChild(alert)
save()
return Response(channel=msg.channel, nick=msg.nick,
message="this site is now under my supervision.")
def format_response(site, link='%s', title='%s', categ='%s', content='%s'):
"""Format and send response for given site
Argument:
site -- DATAS structure representing a site to watch
Keyword arguments:
link -- link to the content
title -- for ATOM feed: title of the new article
categ -- for ATOM feed: category of the new article
content -- content of the page/new article
"""
for a in site.getNodes("alert"):
send_response(a["server"],
Response(a["message"].format(url=site["url"],
link=link,
title=title,
categ=categ,
content=content),
channel=a["channel"],
server=a["server"]))
def alert_change(content, site):
"""Function called when a change is detected on a given site
Arguments:
content -- The new content
site -- DATAS structure representing a site to watch
"""
if site["type"] == "updown":
if site["lastcontent"] is None:
site["lastcontent"] = content is not None
if (content is not None) != site.getBool("lastcontent"):
format_response(site, link=site["url"])
site["lastcontent"] = content is not None
start_watching(site)
return
if content is None:
start_watching(site)
return
if site["type"] == "atom":
if site["_lastpage"] is None:
if site["lastcontent"] is None or site["lastcontent"] == "":
site["lastcontent"] = content
site["_lastpage"] = Atom(site["lastcontent"])
try:
page = Atom(content)
except:
print("An error occurs during Atom parsing. Restart event...")
start_watching(site)
return
diff = site["_lastpage"].diff(page)
if len(diff) > 0:
site["_lastpage"] = page
diff.reverse()
for d in diff:
site.setIndex("term", "category")
categories = site.index
if len(categories) > 0:
if d.category is None or d.category not in categories:
format_response(site, link=d.link, categ=categories[""]["part"], title=d.title)
else:
format_response(site, link=d.link, categ=categories[d.category]["part"], title=d.title)
else:
format_response(site, link=d.link, title=urllib.parse.unquote(d.title))
else:
start_watching(site)
return # Stop here, no changes, so don't save
else: # Just looking for any changes
format_response(site, link=site["url"], content=content)
site["lastcontent"] = content
start_watching(site)
save()
def start_watching(site, offset=0):
"""Launch the event watching given site
Argument:
site -- DATAS structure representing a site to watch
Keyword argument:
offset -- offset time to delay the launch of the first check
"""
o = urlparse(site["url"], "http")
print_debug("Add %s event for site: %s" % (site["type"], o.netloc))
evt = ModuleEvent(func=lambda url: page.render(url, None),
cmp_data=site["lastcontent"],
func_data=site["url"], offset=offset,
interval=site.getInt("time"),
call=alert_change, call_data=site)
site["_evt_id"] = add_event(evt)