tools/web: add a URL normalizer function

2015-10-11 17:04:21 +02:00 · 2015-10-11 17:04:21 +02:00 · 20105e7d98
commit 20105e7d98
parent 7102e08000
3 changed files with 15 additions and 16 deletions
--- a/modules/networking/isup.py
+++ b/modules/networking/isup.py
@ -10,8 +10,6 @@ def isup(url):
    """
    o = urllib.parse.urlparse(url, "http")
    if o.netloc == "":
        o = urllib.parse.urlparse("http://" + url)
    if o.netloc != "":
        isup = getJSON("http://isitup.org/%s.json" % o.netloc)
        if isup is not None and "status_code" in isup and isup["status_code"] == 1:
--- a/modules/networking/w3c.py
+++ b/modules/networking/w3c.py
@ -13,9 +13,7 @@ def validator(url):
    o = urllib.parse.urlparse(url, "http")
    if o.netloc == "":
-        o = urllib.parse.urlparse("http://" + url)
+        raise IRCException("Indicate a valid URL!")
    if o.netloc == "":
        raise IRCException("Indiquer une URL valide !")
    try:
        req = urllib.request.Request("http://validator.w3.org/check?uri=%s&output=json" % (urllib.parse.quote(o.geturl())), headers={ 'User-Agent' : "Nemubot v%s" % __version__})
--- a/nemubot/tools/web.py
+++ b/nemubot/tools/web.py
@ -16,7 +16,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
-from urllib.parse import urlparse
+from urllib.parse import urlparse, urlsplit, urlunsplit
 from nemubot.exception import IRCException
@ -24,38 +24,43 @@ from nemubot.exception import IRCException
 def isURL(url):
    """Return True if the URL can be parsed"""
    o = urlparse(url)
-    return o.scheme == "" and o.netloc == "" and o.path == ""
+    return o.netloc == "" and o.path == ""
 def getNormalizedURL(url):
    """Return a normalized form for the given URL"""
    return urlunsplit(urlsplit(url, "http"))
 def getScheme(url):
    """Return the protocol of a given URL"""
-    o = urlparse(url)
+    o = urlparse(url, "http")
    return o.scheme
 def getHost(url):
    """Return the domain of a given URL"""
-    return urlparse(url).hostname
+    return urlparse(url, "http").hostname
 def getPort(url):
    """Return the port of a given URL"""
-    return urlparse(url).port
+    return urlparse(url, "http").port
 def getPath(url):
    """Return the page request of a given URL"""
-    return urlparse(url).path
+    return urlparse(url, "http").path
 def getUser(url):
    """Return the page request of a given URL"""
-    return urlparse(url).username
+    return urlparse(url, "http").username
 def getPassword(url):
    """Return the page request of a given URL"""
-    return urlparse(url).password
+    return urlparse(url, "http").password
 # Get real pages
@ -69,9 +74,7 @@ def getURLContent(url, body=None, timeout=7, header=None):
    timeout -- maximum number of seconds to wait before returning an exception
    """
-    o = urlparse(url)
+    o = urlparse(url, "http")
    if o.netloc == "":
        o = urlparse("http://" + url)
    import http.client