From 05a148fef40acd4a744abef4d9ae8cca00841e53 Mon Sep 17 00:00:00 2001
From: nemunaire <nemunaire@nemunai.re>
Date: Tue, 24 Jun 2014 18:07:28 +0200
Subject: [PATCH] Web tool now handles HTTPS connections, content is decoded
 following given charset header

---
 modules/networking.py |  6 +++++-
 tools/web.py          | 24 +++++++++++++++++++++---
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/modules/networking.py b/modules/networking.py
index a6ae68d..0297076 100644
--- a/modules/networking.py
+++ b/modules/networking.py
@@ -40,11 +40,13 @@ def cmd_curl(msg):
             req = web.getURLContent(" ".join(msg.cmds[1:]))
             if req is not None:
                 res = Response(msg.sender, channel=msg.channel)
-                for m in req.decode().split("\n"):
+                for m in req.split("\n"):
                     res.append_message(m)
                 return res
             else:
                 return Response(msg.sender, "Une erreur est survenue lors de l'accès à cette URL", channel=msg.channel)
+        except socket.timeout:
+            return Response(msg.sender, "le délais d'attente a été dépassé durant l'accès à %s" % msg.cmds[1:], channel=msg.channel, nick=msg.nick)
         except socket.error as e:
             return Response(msg.sender, e.strerror, channel=msg.channel)
     else:
@@ -117,6 +119,8 @@ def cmd_whois(msg):
     try:
         req = urllib.request.Request("http://www.whoisxmlapi.com/whoisserver/WhoisService?rid=1&domainName=%s&outputFormat=json&userName=%s&password=%s" % (urllib.parse.quote(dom), urllib.parse.quote(CONF.getNode("whoisxmlapi")["username"]), urllib.parse.quote(CONF.getNode("whoisxmlapi")["password"])), headers={ 'User-Agent' : "nemubot v3" })
         raw = urllib.request.urlopen(req, timeout=10)
+    except socket.timeout:
+        raise IRCException("Sorry, the request has timed out.")
     except urllib.error.HTTPError as e:
         raise IRCException("HTTP error occurs: %s %s" % (e.code, e.reason))
 
diff --git a/tools/web.py b/tools/web.py
index b0bf2e3..a8a5c3d 100644
--- a/tools/web.py
+++ b/tools/web.py
@@ -61,7 +61,17 @@ def getPassword(url):
 def getURLContent(url, timeout=15):
     """Return page content corresponding to URL or None if any error occurs"""
     o = urlparse(url)
-    conn = http.client.HTTPConnection(o.netloc, port=o.port, timeout=timeout)
+    if o.netloc == "":
+        o = urlparse("http://" + url)
+
+    if o.scheme == "http":
+        conn = http.client.HTTPConnection(o.netloc, port=o.port, timeout=timeout)
+    elif o.scheme == "https":
+        conn = http.client.HTTPSConnection(o.netloc, port=o.port, timeout=timeout)
+    elif o.scheme is None or o.scheme == "":
+        conn = http.client.HTTPConnection(o.netloc, port=80, timeout=timeout)
+    else:
+        return None
     try:
         if o.query != '':
             conn.request("GET", o.path + "?" + o.query, None, {"User-agent": "Nemubot v3"})
@@ -70,8 +80,8 @@ def getURLContent(url, timeout=15):
     except socket.timeout:
         return None
     except socket.gaierror:
-        print ("<tools.web> Unable to receive page %s from %s on %d."
-               % (o.path, o.netloc, o.port))
+        print ("<tools.web> Unable to receive page %s on %s from %s."
+               % (o.path, o.netloc, url))
         return None
 
     try:
@@ -83,6 +93,14 @@ def getURLContent(url, timeout=15):
             return None
 
         data = res.read(size)
+
+        # Decode content
+        charset = res.getheader("Content-Type").split(";")
+        if len(charset) > 1:
+            for c in charset:
+                ch = c.split("=")
+                if ch[0].strip().lower() == "charset" and len(ch) > 1:
+                    data = data.decode(ch[1])
     except http.client.BadStatusLine:
         return None
     finally: