From 9b38b218989a102ade0966f1834bd56713f665e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=A9munaire?= Date: Sun, 4 Nov 2012 16:26:20 +0100 Subject: [PATCH] Use new web tool into various modules --- modules/ddg/DDGSearch.py | 31 ++------------ modules/ddg/WFASearch.py | 41 +++++++----------- modules/syno.py | 34 ++------------- modules/velib.py | 36 ++++------------ modules/velib.xml | 2 +- tools/__init__.py | 0 tools/web.py | 89 +++++++++++++++++++++++++++++----------- 7 files changed, 96 insertions(+), 137 deletions(-) create mode 100644 tools/__init__.py diff --git a/modules/ddg/DDGSearch.py b/modules/ddg/DDGSearch.py index 7f96742..2cbcc03 100644 --- a/modules/ddg/DDGSearch.py +++ b/modules/ddg/DDGSearch.py @@ -1,19 +1,13 @@ # coding=utf-8 -import http.client -import re from urllib.parse import quote -import xmlparser +from tools import web class DDGSearch: def __init__(self, terms): self.terms = terms - (res, page) = getPage(terms) - if res == http.client.OK or res == http.client.SEE_OTHER: - self.ddgres = xmlparser.parse_string(page) - else: - self.ddgres = None + self.ddgres = web.getXML("http://api.duckduckgo.com/?q=%s&format=xml" % quote(terms)) @property def type(self): @@ -55,7 +49,7 @@ class DDGSearch: @property def answer(self): try: - return striphtml(self.ddgres.getFirstNode("Answer").getContent()) + return web.striphtml(self.ddgres.getFirstNode("Answer").getContent()) except: return None @@ -68,22 +62,3 @@ class DDGSearch: return None except: return None - - -def striphtml(data): - p = re.compile(r'<.*?>') - return p.sub('', data).replace("(", "/(").replace(")", ")/").replace(""", "\"") - -def getPage(terms): - conn = http.client.HTTPConnection("api.duckduckgo.com", timeout=5) - try: - conn.request("GET", "/?q=%s&format=xml" % quote(terms)) - except socket.gaierror: - print ("impossible de récupérer la page %s."%(p)) - return (http.client.INTERNAL_SERVER_ERROR, None) - - res = conn.getresponse() - data = res.read() - - conn.close() - return (res.status, data) diff --git a/modules/ddg/WFASearch.py b/modules/ddg/WFASearch.py index c74296f..c1e9021 100644 --- a/modules/ddg/WFASearch.py +++ b/modules/ddg/WFASearch.py @@ -1,19 +1,22 @@ # coding=utf-8 -import http.client -import re -import socket from urllib.parse import quote -import xmlparser +from tools import web class WFASearch: def __init__(self, terms): self.terms = terms - (res, page) = getPage(terms) - if res == http.client.OK: - self.wfares = xmlparser.parse_string(page) - else: + try: + self.wfares = web.getXML("http://api.wolframalpha.com/v2/query?" + "input=%s&appid=%s" + % (quote(terms), + CONF.getNode("wfaapi")["key"])) + except (TypeError, KeyError): + print ("You need a Wolfram|Alpha API key in order to use this " + "module. Add it to the module configuration file:\n\nRegister at " + "http://products.wolframalpha.com/api/") self.wfares = None @property @@ -25,7 +28,9 @@ class WFASearch: @property def error(self): - if self.wfares["error"] == "true": + if self.wfares is None: + return "An error occurs during computation." + elif self.wfares["error"] == "true": return "An error occurs during computation: " + self.wfares.getNode("error").getNode("msg").getContent() elif self.wfares.hasNode("didyoumeans"): start = "Did you mean: " @@ -62,21 +67,3 @@ class WFASearch: yield node["title"] + " " + subnode["title"] + ": " + subnode.getFirstNode("plaintext").getContent() except IndexError: pass - - -def getPage(terms): - conn = http.client.HTTPConnection("api.wolframalpha.com", timeout=15) - try: - conn.request("GET", "/v2/query?input=%s&appid=%s" % (quote(terms), CONF.getNode("wfaapi")["key"])) - except socket.gaierror: - print ("impossible de récupérer la page Wolfram|Alpha.") - return (http.client.INTERNAL_SERVER_ERROR, None) - except (TypeError, KeyError): - print ("You need a Wolfram|Alpha API key in order to use this module. Add it to the module configuration file:\n\nRegister at http://products.wolframalpha.com/api/") - return (http.client.INTERNAL_SERVER_ERROR, None) - - res = conn.getresponse() - data = res.read() - - conn.close() - return (res.status, data) diff --git a/modules/syno.py b/modules/syno.py index 8820dc0..aee5a50 100644 --- a/modules/syno.py +++ b/modules/syno.py @@ -1,10 +1,10 @@ # coding=utf-8 -import http.client import re -import socket from urllib.parse import quote +from tools import web + nemubotversion = 3.3 def help_tiny (): @@ -38,8 +38,8 @@ def cmd_syno(msg): def get_synos(word): - (res, page) = getPage(word) - if res == http.client.OK: + page = web.getURLContent("http://www.crisco.unicaen.fr/des/synonymes/%s" % quote(word)) + if page is not None: synos = list() for line in page.decode().split("\n"): if re.match("[ \t]*]*>.*[ \t]*.*", line) is not None: @@ -48,29 +48,3 @@ def get_synos(word): return synos else: return None - - -def getPage(terms): - conn = http.client.HTTPConnection("www.crisco.unicaen.fr", timeout=5) - try: - conn.request("GET", "/des/synonymes/%s" % quote(terms)) - except socket.gaierror: - print ("impossible de récupérer la page Wolfram|Alpha.") - return (http.client.INTERNAL_SERVER_ERROR, None) - - res = conn.getresponse() - data = res.read() - - conn.close() - return (res.status, data) - - -if __name__ == "__main__": - import sys - if len(sys.argv) == 0: - print ("Usage: ./syno.py word [word ...]") - else: - for word in sys.argv: - synos = get_synos(word) - if synos is not None: - print ("Synonyme de %s : %s" % (word, ', '.join(synos))) diff --git a/modules/velib.py b/modules/velib.py index 72a8985..8385476 100644 --- a/modules/velib.py +++ b/modules/velib.py @@ -1,11 +1,8 @@ # coding=utf-8 -import http.client import re -from xml.dom.minidom import parseString -from event import ModuleEvent -from xmlparser.node import ModuleState +from tools import web nemubotversion = 3.3 @@ -26,33 +23,18 @@ def help_full (): return "!velib /number/ ...: gives available bikes and slots at the station /number/." -def getPage (s, p): - conn = http.client.HTTPConnection(s, timeout=10) - try: - conn.request("GET", p) - except socket.gaierror: - print ("[%s] impossible de récupérer la page %s."%(s, p)) - return None - - res = conn.getresponse() - data = res.read() - - conn.close() - return (res.status, data) - def station_status(station): """Gets available and free status of a given station""" - (st, page) = getPage(CONF.getNode("server")["ip"], CONF.getNode("server")["url"] + station) - if st == http.client.OK: - response = parseString(page) - available = response.documentElement.getElementsByTagName("available") - if len(available) > 0: - available = int(available[0].childNodes[0].nodeValue) + response = web.getXML(CONF.getNode("server")["url"] + station) + if response is not None: + available = response.getNode("available").getContent() + if available is not None and len(available) > 0: + available = int(available) else: available = 0 - free = response.documentElement.getElementsByTagName("free") - if len(free) > 0: - free = int(free[0].childNodes[0].nodeValue) + free = response.getNode("free").getContent() + if free is not None and len(free) > 0: + free = int(free) else: free = 0 return (available, free) diff --git a/modules/velib.xml b/modules/velib.xml index 112fa7f..61d8a5e 100644 --- a/modules/velib.xml +++ b/modules/velib.xml @@ -1,5 +1,5 @@ - + diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/web.py b/tools/web.py index 5f3f4d3..bec4bf8 100644 --- a/tools/web.py +++ b/tools/web.py @@ -18,18 +18,30 @@ import http.client import re +import socket from urllib.parse import quote +import xmlparser + +# Parse URL + def parseURL(url): """Separate protocol, domain, port and page request""" res = re.match("^(([^:]+)://)?([^:/]+)(:([0-9]{1,5}))?(.*)$", url) if res is not None: - port = res.group(5) - if port is None and res.group(2) is not None: + if res.group(5) is not None: + port = int(res.group(5)) + elif res.group(2) is not None: if res.group(2) == "http": port = 80 elif res.group(2) == "https": port = 443 + else: + print (" WARNING: unknown protocol %s" + % res.group(2)) + port = 0 + else: + port = 0 return (res.group(2), res.group(3), port, res.group(6)) else: return (None, None, None, None) @@ -40,38 +52,67 @@ def getDomain(url): return domain def getProtocol(url): - """Return the domain of a given URL""" + """Return the protocol of a given URL""" (protocol, domain, port, page) = parseURL(url) return protocol -def getURL(url): +def getPort(url): + """Return the port of a given URL""" + (protocol, domain, port, page) = parseURL(url) + return port + +def getRequest(url): + """Return the page request of a given URL""" + (protocol, domain, port, page) = parseURL(url) + return page + + +# Get real pages + +def getURLContent(url, timeout=15): """Return page content corresponding to URL or None if any error occurs""" - conn = http.client.HTTPConnection("api.duckduckgo.com", timeout=5) + (protocol, domain, port, page) = parseURL(url) + if port == 0: port = 80 + conn = http.client.HTTPConnection(domain, port=port, timeout=15) try: - conn.request("GET", "/?q=%s&format=xml" % quote(terms)) + conn.request("GET", page, None, {"User-agent": "Nemubot v3"}) except socket.gaierror: - print ("impossible de récupérer la page %s."%(p)) - return (http.client.INTERNAL_SERVER_ERROR, None) + print (" Unable to receive page %s from %s on %d." + % (page, domain, port)) + return None res = conn.getresponse() data = res.read() conn.close() - return (res.status, data) + if res.status == http.client.OK or res.status == http.client.SEE_OTHER: + return data + #TODO: follow redirections + else: + return None + +def getXML(url, timeout=15): + """Get content page and return XML parsed content""" + cnt = getURLContent(url, timeout) + if cnt is None: + return None + else: + return xmlparser.parse_string(cnt) + +# Other utils + +def striphtml(data): + """Remove HTML tags from text""" + p = re.compile(r'<.*?>') + return p.sub('', data).replace("(", "/(").replace(")", ")/").replace(""", "\"") + + +# Tests when called alone if __name__ == "__main__": - content1 = "" - with open("rss.php.1", "r") as f: - for line in f: - content1 += line - content2 = "" - with open("rss.php", "r") as f: - for line in f: - content2 += line - a = Atom (content1) - print (a.updated) - b = Atom (content2) - print (b.updated) - - diff = a.diff (b) - print (diff) + print(parseURL("www.nemunai.re")) + print(parseURL("www.nemunai.re/?p0m")) + print(parseURL("http://www.nemunai.re/?p0m")) + print(parseURL("http://www.nemunai.re:42/?p0m")) + print(parseURL("www.nemunai.re:42/?p0m")) + print(parseURL("http://www.nemunai.re/?p0m"))