WatchWebsite module use urllib and can group same page watch over multiple channels/IRC networks
This commit is contained in:
parent
1693488a56
commit
cb0056b605
@ -7,8 +7,9 @@ import hashlib
|
|||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import urllib.parse
|
||||||
from urllib.parse import unquote
|
from urllib.parse import urlparse
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
from .atom import Atom
|
from .atom import Atom
|
||||||
|
|
||||||
@ -23,51 +24,60 @@ def help_full ():
|
|||||||
|
|
||||||
def load(context):
|
def load(context):
|
||||||
"""Register watched website"""
|
"""Register watched website"""
|
||||||
|
DATAS.setIndex("url", "watch")
|
||||||
for site in DATAS.getNodes("watch"):
|
for site in DATAS.getNodes("watch"):
|
||||||
start_watching(site)
|
start_watching(site)
|
||||||
|
|
||||||
def unload(context):
|
def unload(context):
|
||||||
"""Unregister watched website"""
|
"""Unregister watched website"""
|
||||||
for site in DATAS.getNodes("watch"):
|
# Useless in 3.3?
|
||||||
context.del_event(site["evt_id"])
|
# for site in DATAS.getNodes("watch"):
|
||||||
|
# context.del_event(site["evt_id"])
|
||||||
|
pass
|
||||||
|
|
||||||
|
def getPageContent(url):
|
||||||
|
"""Returns the content of the given url"""
|
||||||
|
print_debug("Get page %s" % url)
|
||||||
|
raw = urlopen(url, timeout=15)
|
||||||
|
return raw.read().decode()
|
||||||
|
|
||||||
def start_watching(site):
|
def start_watching(site):
|
||||||
print_debug("Add event for site: http://%s%s" % (site["server"], site["page"]))
|
o = urlparse(site["url"], "http")
|
||||||
evt = ModuleEvent(func=getPage, cmp_data=site["lastcontent"],
|
print_debug("Add event for site: %s" % o.netloc)
|
||||||
func_data=dict(s=site["server"], p=site["page"]),
|
evt = ModuleEvent(func=getPageContent, cmp_data=site["lastcontent"],
|
||||||
|
func_data=site["url"],
|
||||||
intervalle=site.getInt("time"),
|
intervalle=site.getInt("time"),
|
||||||
call=alert_change, call_data=site)
|
call=alert_change, call_data=site)
|
||||||
site["evt_id"] = add_event(evt)
|
site["_evt_id"] = add_event(evt)
|
||||||
|
|
||||||
|
|
||||||
def explore_url(url):
|
|
||||||
return re.match("^(http://)?([^/:]+)(/.*)$", url)
|
|
||||||
|
|
||||||
def found_site(s, p):
|
|
||||||
for site in DATAS.getNodes("watch"):
|
|
||||||
if site is not None and site["server"] == s and site["page"] == p:
|
|
||||||
return site
|
|
||||||
return None
|
|
||||||
|
|
||||||
def del_site(msg):
|
def del_site(msg):
|
||||||
if len(msg.cmds) <= 1:
|
if len(msg.cmds) <= 1:
|
||||||
return Response(msg.sender, "quel site dois-je arrêter de surveiller ?",
|
return Response(msg.sender, "quel site dois-je arrêter de surveiller ?",
|
||||||
msg.channel, msg.nick)
|
msg.channel, msg.nick)
|
||||||
|
|
||||||
rx = explore_url(msg.cmds[1])
|
url = msg.cmds[1]
|
||||||
if rx is not None:
|
|
||||||
site = found_site(rx.group(2), rx.group(3))
|
o = urlparse(url, "http")
|
||||||
if site is not None and (msg.sender == site["sender"] or msg.is_owner):
|
if o.scheme != "" and url in DATAS.index:
|
||||||
del_event(site["evt_id"])
|
site = DATAS.index[url]
|
||||||
|
for a in site.getNodes("alert"):
|
||||||
|
if a["channel"] == msg.channel:
|
||||||
|
if (msg.sender == a["sender"] or msg.is_owner):
|
||||||
|
site.delChild(a)
|
||||||
|
if not site.hasNode("alert"):
|
||||||
|
del_event(site["_evt_id"])
|
||||||
DATAS.delChild(site)
|
DATAS.delChild(site)
|
||||||
save()
|
save()
|
||||||
return Response(msg.sender, "je ne surveille désormais plus cette URL.",
|
return Response(msg.sender,
|
||||||
channel=msg.channel, nick=msg.nick)
|
"je ne surveille désormais plus cette URL.",
|
||||||
elif site is None:
|
|
||||||
return Response(msg.sender, "je ne surveillais pas cette URL, impossible de la supprimer.",
|
|
||||||
channel=msg.channel, nick=msg.nick)
|
channel=msg.channel, nick=msg.nick)
|
||||||
else:
|
else:
|
||||||
return Response(msg.sender, "Vous ne pouvez pas supprimer cette URL.",
|
return Response(msg.sender,
|
||||||
|
"Vous ne pouvez pas supprimer cette URL.",
|
||||||
|
channel=msg.channel, nick=msg.nick)
|
||||||
|
return Response(msg.sender,
|
||||||
|
"je ne surveillais pas cette URL, impossible de la supprimer.",
|
||||||
channel=msg.channel, nick=msg.nick)
|
channel=msg.channel, nick=msg.nick)
|
||||||
return Response(msg.sender, "je ne surveillais pas cette URL pour vous.",
|
return Response(msg.sender, "je ne surveillais pas cette URL pour vous.",
|
||||||
channel=msg.channel, nick=msg.nick)
|
channel=msg.channel, nick=msg.nick)
|
||||||
@ -77,28 +87,44 @@ def add_site(msg):
|
|||||||
return Response(msg.sender, "quel site dois-je surveiller ?",
|
return Response(msg.sender, "quel site dois-je surveiller ?",
|
||||||
msg.channel, msg.nick)
|
msg.channel, msg.nick)
|
||||||
|
|
||||||
rx = explore_url(msg.cmds[1])
|
url = msg.cmds[1]
|
||||||
if rx is None:
|
|
||||||
|
o = urlparse(url, "http")
|
||||||
|
if o.netloc != "":
|
||||||
|
alert = ModuleState("alert")
|
||||||
|
alert["sender"] = msg.sender
|
||||||
|
alert["server"] = msg.server
|
||||||
|
alert["channel"] = msg.channel
|
||||||
|
alert["message"] = "%s a changé !" % url
|
||||||
|
|
||||||
|
if url not in DATAS.index:
|
||||||
|
watch = ModuleState("watch")
|
||||||
|
watch["type"] = "diff"
|
||||||
|
watch["url"] = url
|
||||||
|
watch["time"] = 123
|
||||||
|
DATAS.addChild(watch)
|
||||||
|
watch.addChild(alert)
|
||||||
|
start_watching(watch)
|
||||||
|
else:
|
||||||
|
DATAS.index[url].addChild(alert)
|
||||||
|
else:
|
||||||
return Response(msg.sender, "je ne peux pas surveiller cette URL",
|
return Response(msg.sender, "je ne peux pas surveiller cette URL",
|
||||||
channel=msg.channel, nick=msg.nick)
|
channel=msg.channel, nick=msg.nick)
|
||||||
else:
|
|
||||||
watch = ModuleState("watch")
|
|
||||||
watch["sender"] = msg.sender
|
|
||||||
watch["irc"] = msg.srv.id
|
|
||||||
watch["channel"] = msg.channel
|
|
||||||
watch["type"] = "diff"
|
|
||||||
watch["server"] = rx.group(2)
|
|
||||||
watch["page"] = rx.group(3)
|
|
||||||
watch["time"] = 123
|
|
||||||
watch["message"] = "http://%s%s a changé !" % (watch["server"],
|
|
||||||
watch["page"])
|
|
||||||
DATAS.addChild(watch)
|
|
||||||
start_watching(watch)
|
|
||||||
|
|
||||||
save()
|
save()
|
||||||
return Response(msg.sender, channel=msg.channel, nick=msg.nick,
|
return Response(msg.sender, channel=msg.channel, nick=msg.nick,
|
||||||
message="ce site est maintenant sous ma surveillance.")
|
message="ce site est maintenant sous ma surveillance.")
|
||||||
|
|
||||||
|
def format_response(site, data1='%s', data2='%s', data3='%s', data4='%s'):
|
||||||
|
for a in site.getNodes("alert"):
|
||||||
|
if a["message"].count("%s") == 1: data = data1
|
||||||
|
elif a["message"].count("%s") == 2: data = (data2, data1)
|
||||||
|
elif a["message"].count("%s") == 3: data = (data3, data2, data1)
|
||||||
|
elif a["message"].count("%s") == 4: data = (data4, data3, data2, data1)
|
||||||
|
else: data = ()
|
||||||
|
send_response(a["server"], Response(a["sender"], a["message"] % data,
|
||||||
|
channel=a["channel"], server=a["server"]))
|
||||||
|
|
||||||
def alert_change(content, site):
|
def alert_change(content, site):
|
||||||
"""Alert when a change is detected"""
|
"""Alert when a change is detected"""
|
||||||
if content is None:
|
if content is None:
|
||||||
@ -120,55 +146,25 @@ def alert_change(content, site):
|
|||||||
diff = site["_lastpage"].diff(page)
|
diff = site["_lastpage"].diff(page)
|
||||||
if len(diff) > 0:
|
if len(diff) > 0:
|
||||||
site["_lastpage"] = page
|
site["_lastpage"] = page
|
||||||
print_debug("[%s] Page differ!" % site["server"])
|
print_debug("[%s] Page differ!" % getHost(site["url"]))
|
||||||
diff.reverse()
|
diff.reverse()
|
||||||
for d in diff:
|
for d in diff:
|
||||||
site.setIndex("term", "category")
|
site.setIndex("term", "category")
|
||||||
categories = site.index
|
categories = site.index
|
||||||
|
|
||||||
if site["message"].count("%s") == 2 and len(categories) > 0:
|
if len(categories) > 0:
|
||||||
if d.category is None or d.category not in categories:
|
if d.category is None or d.category not in categories:
|
||||||
messageI = site["message"] % (categories[""]["part"], "%s")
|
format_response(site, link, categories[""]["part"])
|
||||||
else:
|
else:
|
||||||
messageI = site["message"] % (categories[d.category]["part"], "%s")
|
format_response(site, link, categories[d.category]["part"])
|
||||||
send_response(site["irc"], Response(site["sender"],
|
|
||||||
messageI % d.link,
|
|
||||||
site["channel"]))
|
|
||||||
elif site["message"].count("%s") == 2:
|
|
||||||
send_response(site["irc"], Response(site["sender"],
|
|
||||||
site["message"] % (unquote(d.title), d.link),
|
|
||||||
site["channel"]))
|
|
||||||
elif site["message"].count("%s") == 1:
|
|
||||||
send_response(site["irc"], Response(site["sender"],
|
|
||||||
site["message"] % unquote (d.title),
|
|
||||||
site["channel"]))
|
|
||||||
else:
|
else:
|
||||||
send_response(site["irc"], Response(site["sender"],
|
format_response(site, link, urllib.parse.unquote(d.title))
|
||||||
site["message"],
|
|
||||||
site["channel"]))
|
|
||||||
else:
|
else:
|
||||||
start_watching(site)
|
start_watching(site)
|
||||||
return #Stop here, no changes, so don't save
|
return #Stop here, no changes, so don't save
|
||||||
|
|
||||||
else: # Just looking for any changes
|
else: # Just looking for any changes
|
||||||
send_response(site["irc"], Response(site["sender"], site["message"], site["channel"]))
|
format_response(site, site["url"])
|
||||||
site["lastcontent"] = content
|
site["lastcontent"] = content
|
||||||
start_watching(site)
|
start_watching(site)
|
||||||
save()
|
save()
|
||||||
|
|
||||||
#TODO: built-in this function
|
|
||||||
def getPage(s, p):
|
|
||||||
"""Return the page content"""
|
|
||||||
print_debug("Looking http://%s%s"%(s,p))
|
|
||||||
conn = http.client.HTTPConnection(s, timeout=10)
|
|
||||||
try:
|
|
||||||
conn.request("GET", p)
|
|
||||||
|
|
||||||
res = conn.getresponse()
|
|
||||||
data = res.read()
|
|
||||||
except:
|
|
||||||
print ("[%s] impossible de récupérer la page %s."%(s, p))
|
|
||||||
return None
|
|
||||||
|
|
||||||
conn.close()
|
|
||||||
return data.decode()
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user