From 05a148fef40acd4a744abef4d9ae8cca00841e53 Mon Sep 17 00:00:00 2001 From: nemunaire Date: Tue, 24 Jun 2014 18:07:28 +0200 Subject: [PATCH] Web tool now handles HTTPS connections, content is decoded following given charset header --- modules/networking.py | 6 +++++- tools/web.py | 24 +++++++++++++++++++++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/modules/networking.py b/modules/networking.py index a6ae68d..0297076 100644 --- a/modules/networking.py +++ b/modules/networking.py @@ -40,11 +40,13 @@ def cmd_curl(msg): req = web.getURLContent(" ".join(msg.cmds[1:])) if req is not None: res = Response(msg.sender, channel=msg.channel) - for m in req.decode().split("\n"): + for m in req.split("\n"): res.append_message(m) return res else: return Response(msg.sender, "Une erreur est survenue lors de l'accès à cette URL", channel=msg.channel) + except socket.timeout: + return Response(msg.sender, "le délais d'attente a été dépassé durant l'accès à %s" % msg.cmds[1:], channel=msg.channel, nick=msg.nick) except socket.error as e: return Response(msg.sender, e.strerror, channel=msg.channel) else: @@ -117,6 +119,8 @@ def cmd_whois(msg): try: req = urllib.request.Request("http://www.whoisxmlapi.com/whoisserver/WhoisService?rid=1&domainName=%s&outputFormat=json&userName=%s&password=%s" % (urllib.parse.quote(dom), urllib.parse.quote(CONF.getNode("whoisxmlapi")["username"]), urllib.parse.quote(CONF.getNode("whoisxmlapi")["password"])), headers={ 'User-Agent' : "nemubot v3" }) raw = urllib.request.urlopen(req, timeout=10) + except socket.timeout: + raise IRCException("Sorry, the request has timed out.") except urllib.error.HTTPError as e: raise IRCException("HTTP error occurs: %s %s" % (e.code, e.reason)) diff --git a/tools/web.py b/tools/web.py index b0bf2e3..a8a5c3d 100644 --- a/tools/web.py +++ b/tools/web.py @@ -61,7 +61,17 @@ def getPassword(url): def getURLContent(url, timeout=15): """Return page content corresponding to URL or None if any error occurs""" o = urlparse(url) - conn = http.client.HTTPConnection(o.netloc, port=o.port, timeout=timeout) + if o.netloc == "": + o = urlparse("http://" + url) + + if o.scheme == "http": + conn = http.client.HTTPConnection(o.netloc, port=o.port, timeout=timeout) + elif o.scheme == "https": + conn = http.client.HTTPSConnection(o.netloc, port=o.port, timeout=timeout) + elif o.scheme is None or o.scheme == "": + conn = http.client.HTTPConnection(o.netloc, port=80, timeout=timeout) + else: + return None try: if o.query != '': conn.request("GET", o.path + "?" + o.query, None, {"User-agent": "Nemubot v3"}) @@ -70,8 +80,8 @@ def getURLContent(url, timeout=15): except socket.timeout: return None except socket.gaierror: - print (" Unable to receive page %s from %s on %d." - % (o.path, o.netloc, o.port)) + print (" Unable to receive page %s on %s from %s." + % (o.path, o.netloc, url)) return None try: @@ -83,6 +93,14 @@ def getURLContent(url, timeout=15): return None data = res.read(size) + + # Decode content + charset = res.getheader("Content-Type").split(";") + if len(charset) > 1: + for c in charset: + ch = c.split("=") + if ch[0].strip().lower() == "charset" and len(ch) > 1: + data = data.decode(ch[1]) except http.client.BadStatusLine: return None finally: