diff --git a/modules/ddg/DDGSearch.py b/modules/ddg/DDGSearch.py
index 7f96742..2cbcc03 100644
--- a/modules/ddg/DDGSearch.py
+++ b/modules/ddg/DDGSearch.py
@@ -1,19 +1,13 @@
# coding=utf-8
-import http.client
-import re
from urllib.parse import quote
-import xmlparser
+from tools import web
class DDGSearch:
def __init__(self, terms):
self.terms = terms
- (res, page) = getPage(terms)
- if res == http.client.OK or res == http.client.SEE_OTHER:
- self.ddgres = xmlparser.parse_string(page)
- else:
- self.ddgres = None
+ self.ddgres = web.getXML("http://api.duckduckgo.com/?q=%s&format=xml" % quote(terms))
@property
def type(self):
@@ -55,7 +49,7 @@ class DDGSearch:
@property
def answer(self):
try:
- return striphtml(self.ddgres.getFirstNode("Answer").getContent())
+ return web.striphtml(self.ddgres.getFirstNode("Answer").getContent())
except:
return None
@@ -68,22 +62,3 @@ class DDGSearch:
return None
except:
return None
-
-
-def striphtml(data):
- p = re.compile(r'<.*?>')
- return p.sub('', data).replace("(", "/(").replace(")", ")/").replace(""", "\"")
-
-def getPage(terms):
- conn = http.client.HTTPConnection("api.duckduckgo.com", timeout=5)
- try:
- conn.request("GET", "/?q=%s&format=xml" % quote(terms))
- except socket.gaierror:
- print ("impossible de récupérer la page %s."%(p))
- return (http.client.INTERNAL_SERVER_ERROR, None)
-
- res = conn.getresponse()
- data = res.read()
-
- conn.close()
- return (res.status, data)
diff --git a/modules/ddg/WFASearch.py b/modules/ddg/WFASearch.py
index c74296f..c1e9021 100644
--- a/modules/ddg/WFASearch.py
+++ b/modules/ddg/WFASearch.py
@@ -1,19 +1,22 @@
# coding=utf-8
-import http.client
-import re
-import socket
from urllib.parse import quote
-import xmlparser
+from tools import web
class WFASearch:
def __init__(self, terms):
self.terms = terms
- (res, page) = getPage(terms)
- if res == http.client.OK:
- self.wfares = xmlparser.parse_string(page)
- else:
+ try:
+ self.wfares = web.getXML("http://api.wolframalpha.com/v2/query?"
+ "input=%s&appid=%s"
+ % (quote(terms),
+ CONF.getNode("wfaapi")["key"]))
+ except (TypeError, KeyError):
+ print ("You need a Wolfram|Alpha API key in order to use this "
+ "module. Add it to the module configuration file:\n\nRegister at "
+ "http://products.wolframalpha.com/api/")
self.wfares = None
@property
@@ -25,7 +28,9 @@ class WFASearch:
@property
def error(self):
- if self.wfares["error"] == "true":
+ if self.wfares is None:
+ return "An error occurs during computation."
+ elif self.wfares["error"] == "true":
return "An error occurs during computation: " + self.wfares.getNode("error").getNode("msg").getContent()
elif self.wfares.hasNode("didyoumeans"):
start = "Did you mean: "
@@ -62,21 +67,3 @@ class WFASearch:
yield node["title"] + " " + subnode["title"] + ": " + subnode.getFirstNode("plaintext").getContent()
except IndexError:
pass
-
-
-def getPage(terms):
- conn = http.client.HTTPConnection("api.wolframalpha.com", timeout=15)
- try:
- conn.request("GET", "/v2/query?input=%s&appid=%s" % (quote(terms), CONF.getNode("wfaapi")["key"]))
- except socket.gaierror:
- print ("impossible de récupérer la page Wolfram|Alpha.")
- return (http.client.INTERNAL_SERVER_ERROR, None)
- except (TypeError, KeyError):
- print ("You need a Wolfram|Alpha API key in order to use this module. Add it to the module configuration file:\n\nRegister at http://products.wolframalpha.com/api/")
- return (http.client.INTERNAL_SERVER_ERROR, None)
-
- res = conn.getresponse()
- data = res.read()
-
- conn.close()
- return (res.status, data)
diff --git a/modules/syno.py b/modules/syno.py
index 8820dc0..aee5a50 100644
--- a/modules/syno.py
+++ b/modules/syno.py
@@ -1,10 +1,10 @@
# coding=utf-8
-import http.client
import re
-import socket
from urllib.parse import quote
+from tools import web
+
nemubotversion = 3.3
def help_tiny ():
@@ -38,8 +38,8 @@ def cmd_syno(msg):
def get_synos(word):
- (res, page) = getPage(word)
- if res == http.client.OK:
+ page = web.getURLContent("http://www.crisco.unicaen.fr/des/synonymes/%s" % quote(word))
+ if page is not None:
synos = list()
for line in page.decode().split("\n"):
if re.match("[ \t]*
]*>.*
[ \t]*.*", line) is not None:
@@ -48,29 +48,3 @@ def get_synos(word):
return synos
else:
return None
-
-
-def getPage(terms):
- conn = http.client.HTTPConnection("www.crisco.unicaen.fr", timeout=5)
- try:
- conn.request("GET", "/des/synonymes/%s" % quote(terms))
- except socket.gaierror:
- print ("impossible de récupérer la page Wolfram|Alpha.")
- return (http.client.INTERNAL_SERVER_ERROR, None)
-
- res = conn.getresponse()
- data = res.read()
-
- conn.close()
- return (res.status, data)
-
-
-if __name__ == "__main__":
- import sys
- if len(sys.argv) == 0:
- print ("Usage: ./syno.py word [word ...]")
- else:
- for word in sys.argv:
- synos = get_synos(word)
- if synos is not None:
- print ("Synonyme de %s : %s" % (word, ', '.join(synos)))
diff --git a/modules/velib.py b/modules/velib.py
index 72a8985..8385476 100644
--- a/modules/velib.py
+++ b/modules/velib.py
@@ -1,11 +1,8 @@
# coding=utf-8
-import http.client
import re
-from xml.dom.minidom import parseString
-from event import ModuleEvent
-from xmlparser.node import ModuleState
+from tools import web
nemubotversion = 3.3
@@ -26,33 +23,18 @@ def help_full ():
return "!velib /number/ ...: gives available bikes and slots at the station /number/."
-def getPage (s, p):
- conn = http.client.HTTPConnection(s, timeout=10)
- try:
- conn.request("GET", p)
- except socket.gaierror:
- print ("[%s] impossible de récupérer la page %s."%(s, p))
- return None
-
- res = conn.getresponse()
- data = res.read()
-
- conn.close()
- return (res.status, data)
-
def station_status(station):
"""Gets available and free status of a given station"""
- (st, page) = getPage(CONF.getNode("server")["ip"], CONF.getNode("server")["url"] + station)
- if st == http.client.OK:
- response = parseString(page)
- available = response.documentElement.getElementsByTagName("available")
- if len(available) > 0:
- available = int(available[0].childNodes[0].nodeValue)
+ response = web.getXML(CONF.getNode("server")["url"] + station)
+ if response is not None:
+ available = response.getNode("available").getContent()
+ if available is not None and len(available) > 0:
+ available = int(available)
else:
available = 0
- free = response.documentElement.getElementsByTagName("free")
- if len(free) > 0:
- free = int(free[0].childNodes[0].nodeValue)
+ free = response.getNode("free").getContent()
+ if free is not None and len(free) > 0:
+ free = int(free)
else:
free = 0
return (available, free)
diff --git a/modules/velib.xml b/modules/velib.xml
index 112fa7f..61d8a5e 100644
--- a/modules/velib.xml
+++ b/modules/velib.xml
@@ -1,5 +1,5 @@
-
+
diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tools/web.py b/tools/web.py
index 5f3f4d3..bec4bf8 100644
--- a/tools/web.py
+++ b/tools/web.py
@@ -18,18 +18,30 @@
import http.client
import re
+import socket
from urllib.parse import quote
+import xmlparser
+
+# Parse URL
+
def parseURL(url):
"""Separate protocol, domain, port and page request"""
res = re.match("^(([^:]+)://)?([^:/]+)(:([0-9]{1,5}))?(.*)$", url)
if res is not None:
- port = res.group(5)
- if port is None and res.group(2) is not None:
+ if res.group(5) is not None:
+ port = int(res.group(5))
+ elif res.group(2) is not None:
if res.group(2) == "http":
port = 80
elif res.group(2) == "https":
port = 443
+ else:
+ print (" WARNING: unknown protocol %s"
+ % res.group(2))
+ port = 0
+ else:
+ port = 0
return (res.group(2), res.group(3), port, res.group(6))
else:
return (None, None, None, None)
@@ -40,38 +52,67 @@ def getDomain(url):
return domain
def getProtocol(url):
- """Return the domain of a given URL"""
+ """Return the protocol of a given URL"""
(protocol, domain, port, page) = parseURL(url)
return protocol
-def getURL(url):
+def getPort(url):
+ """Return the port of a given URL"""
+ (protocol, domain, port, page) = parseURL(url)
+ return port
+
+def getRequest(url):
+ """Return the page request of a given URL"""
+ (protocol, domain, port, page) = parseURL(url)
+ return page
+
+
+# Get real pages
+
+def getURLContent(url, timeout=15):
"""Return page content corresponding to URL or None if any error occurs"""
- conn = http.client.HTTPConnection("api.duckduckgo.com", timeout=5)
+ (protocol, domain, port, page) = parseURL(url)
+ if port == 0: port = 80
+ conn = http.client.HTTPConnection(domain, port=port, timeout=15)
try:
- conn.request("GET", "/?q=%s&format=xml" % quote(terms))
+ conn.request("GET", page, None, {"User-agent": "Nemubot v3"})
except socket.gaierror:
- print ("impossible de récupérer la page %s."%(p))
- return (http.client.INTERNAL_SERVER_ERROR, None)
+ print (" Unable to receive page %s from %s on %d."
+ % (page, domain, port))
+ return None
res = conn.getresponse()
data = res.read()
conn.close()
- return (res.status, data)
+ if res.status == http.client.OK or res.status == http.client.SEE_OTHER:
+ return data
+ #TODO: follow redirections
+ else:
+ return None
+
+def getXML(url, timeout=15):
+ """Get content page and return XML parsed content"""
+ cnt = getURLContent(url, timeout)
+ if cnt is None:
+ return None
+ else:
+ return xmlparser.parse_string(cnt)
+
+# Other utils
+
+def striphtml(data):
+ """Remove HTML tags from text"""
+ p = re.compile(r'<.*?>')
+ return p.sub('', data).replace("(", "/(").replace(")", ")/").replace(""", "\"")
+
+
+# Tests when called alone
if __name__ == "__main__":
- content1 = ""
- with open("rss.php.1", "r") as f:
- for line in f:
- content1 += line
- content2 = ""
- with open("rss.php", "r") as f:
- for line in f:
- content2 += line
- a = Atom (content1)
- print (a.updated)
- b = Atom (content2)
- print (b.updated)
-
- diff = a.diff (b)
- print (diff)
+ print(parseURL("www.nemunai.re"))
+ print(parseURL("www.nemunai.re/?p0m"))
+ print(parseURL("http://www.nemunai.re/?p0m"))
+ print(parseURL("http://www.nemunai.re:42/?p0m"))
+ print(parseURL("www.nemunai.re:42/?p0m"))
+ print(parseURL("http://www.nemunai.re/?p0m"))