WatchWebsite module use urllib and can group same page watch over multiple channels/IRC networks

This commit is contained in:
Némunaire 2012-11-07 18:55:17 +01:00
parent 1693488a56
commit cb0056b605

View File

@ -7,8 +7,9 @@ import hashlib
import re import re
import socket import socket
import sys import sys
import traceback import urllib.parse
from urllib.parse import unquote from urllib.parse import urlparse
from urllib.request import urlopen
from .atom import Atom from .atom import Atom
@ -23,51 +24,60 @@ def help_full ():
def load(context): def load(context):
"""Register watched website""" """Register watched website"""
DATAS.setIndex("url", "watch")
for site in DATAS.getNodes("watch"): for site in DATAS.getNodes("watch"):
start_watching(site) start_watching(site)
def unload(context): def unload(context):
"""Unregister watched website""" """Unregister watched website"""
for site in DATAS.getNodes("watch"): # Useless in 3.3?
context.del_event(site["evt_id"]) # for site in DATAS.getNodes("watch"):
# context.del_event(site["evt_id"])
pass
def getPageContent(url):
"""Returns the content of the given url"""
print_debug("Get page %s" % url)
raw = urlopen(url, timeout=15)
return raw.read().decode()
def start_watching(site): def start_watching(site):
print_debug("Add event for site: http://%s%s" % (site["server"], site["page"])) o = urlparse(site["url"], "http")
evt = ModuleEvent(func=getPage, cmp_data=site["lastcontent"], print_debug("Add event for site: %s" % o.netloc)
func_data=dict(s=site["server"], p=site["page"]), evt = ModuleEvent(func=getPageContent, cmp_data=site["lastcontent"],
func_data=site["url"],
intervalle=site.getInt("time"), intervalle=site.getInt("time"),
call=alert_change, call_data=site) call=alert_change, call_data=site)
site["evt_id"] = add_event(evt) site["_evt_id"] = add_event(evt)
def explore_url(url):
return re.match("^(http://)?([^/:]+)(/.*)$", url)
def found_site(s, p):
for site in DATAS.getNodes("watch"):
if site is not None and site["server"] == s and site["page"] == p:
return site
return None
def del_site(msg): def del_site(msg):
if len(msg.cmds) <= 1: if len(msg.cmds) <= 1:
return Response(msg.sender, "quel site dois-je arrêter de surveiller ?", return Response(msg.sender, "quel site dois-je arrêter de surveiller ?",
msg.channel, msg.nick) msg.channel, msg.nick)
rx = explore_url(msg.cmds[1]) url = msg.cmds[1]
if rx is not None:
site = found_site(rx.group(2), rx.group(3)) o = urlparse(url, "http")
if site is not None and (msg.sender == site["sender"] or msg.is_owner): if o.scheme != "" and url in DATAS.index:
del_event(site["evt_id"]) site = DATAS.index[url]
for a in site.getNodes("alert"):
if a["channel"] == msg.channel:
if (msg.sender == a["sender"] or msg.is_owner):
site.delChild(a)
if not site.hasNode("alert"):
del_event(site["_evt_id"])
DATAS.delChild(site) DATAS.delChild(site)
save() save()
return Response(msg.sender, "je ne surveille désormais plus cette URL.", return Response(msg.sender,
channel=msg.channel, nick=msg.nick) "je ne surveille désormais plus cette URL.",
elif site is None:
return Response(msg.sender, "je ne surveillais pas cette URL, impossible de la supprimer.",
channel=msg.channel, nick=msg.nick) channel=msg.channel, nick=msg.nick)
else: else:
return Response(msg.sender, "Vous ne pouvez pas supprimer cette URL.", return Response(msg.sender,
"Vous ne pouvez pas supprimer cette URL.",
channel=msg.channel, nick=msg.nick)
return Response(msg.sender,
"je ne surveillais pas cette URL, impossible de la supprimer.",
channel=msg.channel, nick=msg.nick) channel=msg.channel, nick=msg.nick)
return Response(msg.sender, "je ne surveillais pas cette URL pour vous.", return Response(msg.sender, "je ne surveillais pas cette URL pour vous.",
channel=msg.channel, nick=msg.nick) channel=msg.channel, nick=msg.nick)
@ -77,28 +87,44 @@ def add_site(msg):
return Response(msg.sender, "quel site dois-je surveiller ?", return Response(msg.sender, "quel site dois-je surveiller ?",
msg.channel, msg.nick) msg.channel, msg.nick)
rx = explore_url(msg.cmds[1]) url = msg.cmds[1]
if rx is None:
o = urlparse(url, "http")
if o.netloc != "":
alert = ModuleState("alert")
alert["sender"] = msg.sender
alert["server"] = msg.server
alert["channel"] = msg.channel
alert["message"] = "%s a changé !" % url
if url not in DATAS.index:
watch = ModuleState("watch")
watch["type"] = "diff"
watch["url"] = url
watch["time"] = 123
DATAS.addChild(watch)
watch.addChild(alert)
start_watching(watch)
else:
DATAS.index[url].addChild(alert)
else:
return Response(msg.sender, "je ne peux pas surveiller cette URL", return Response(msg.sender, "je ne peux pas surveiller cette URL",
channel=msg.channel, nick=msg.nick) channel=msg.channel, nick=msg.nick)
else:
watch = ModuleState("watch")
watch["sender"] = msg.sender
watch["irc"] = msg.srv.id
watch["channel"] = msg.channel
watch["type"] = "diff"
watch["server"] = rx.group(2)
watch["page"] = rx.group(3)
watch["time"] = 123
watch["message"] = "http://%s%s a changé !" % (watch["server"],
watch["page"])
DATAS.addChild(watch)
start_watching(watch)
save() save()
return Response(msg.sender, channel=msg.channel, nick=msg.nick, return Response(msg.sender, channel=msg.channel, nick=msg.nick,
message="ce site est maintenant sous ma surveillance.") message="ce site est maintenant sous ma surveillance.")
def format_response(site, data1='%s', data2='%s', data3='%s', data4='%s'):
for a in site.getNodes("alert"):
if a["message"].count("%s") == 1: data = data1
elif a["message"].count("%s") == 2: data = (data2, data1)
elif a["message"].count("%s") == 3: data = (data3, data2, data1)
elif a["message"].count("%s") == 4: data = (data4, data3, data2, data1)
else: data = ()
send_response(a["server"], Response(a["sender"], a["message"] % data,
channel=a["channel"], server=a["server"]))
def alert_change(content, site): def alert_change(content, site):
"""Alert when a change is detected""" """Alert when a change is detected"""
if content is None: if content is None:
@ -120,55 +146,25 @@ def alert_change(content, site):
diff = site["_lastpage"].diff(page) diff = site["_lastpage"].diff(page)
if len(diff) > 0: if len(diff) > 0:
site["_lastpage"] = page site["_lastpage"] = page
print_debug("[%s] Page differ!" % site["server"]) print_debug("[%s] Page differ!" % getHost(site["url"]))
diff.reverse() diff.reverse()
for d in diff: for d in diff:
site.setIndex("term", "category") site.setIndex("term", "category")
categories = site.index categories = site.index
if site["message"].count("%s") == 2 and len(categories) > 0: if len(categories) > 0:
if d.category is None or d.category not in categories: if d.category is None or d.category not in categories:
messageI = site["message"] % (categories[""]["part"], "%s") format_response(site, link, categories[""]["part"])
else: else:
messageI = site["message"] % (categories[d.category]["part"], "%s") format_response(site, link, categories[d.category]["part"])
send_response(site["irc"], Response(site["sender"],
messageI % d.link,
site["channel"]))
elif site["message"].count("%s") == 2:
send_response(site["irc"], Response(site["sender"],
site["message"] % (unquote(d.title), d.link),
site["channel"]))
elif site["message"].count("%s") == 1:
send_response(site["irc"], Response(site["sender"],
site["message"] % unquote (d.title),
site["channel"]))
else: else:
send_response(site["irc"], Response(site["sender"], format_response(site, link, urllib.parse.unquote(d.title))
site["message"],
site["channel"]))
else: else:
start_watching(site) start_watching(site)
return #Stop here, no changes, so don't save return #Stop here, no changes, so don't save
else: # Just looking for any changes else: # Just looking for any changes
send_response(site["irc"], Response(site["sender"], site["message"], site["channel"])) format_response(site, site["url"])
site["lastcontent"] = content site["lastcontent"] = content
start_watching(site) start_watching(site)
save() save()
#TODO: built-in this function
def getPage(s, p):
"""Return the page content"""
print_debug("Looking http://%s%s"%(s,p))
conn = http.client.HTTPConnection(s, timeout=10)
try:
conn.request("GET", p)
res = conn.getresponse()
data = res.read()
except:
print ("[%s] impossible de récupérer la page %s."%(s, p))
return None
conn.close()
return data.decode()