diff --git a/modules/framalink.py b/modules/framalink.py index 8b17cd8..3ed1214 100644 --- a/modules/framalink.py +++ b/modules/framalink.py @@ -34,7 +34,7 @@ PROVIDERS = { } DEFAULT_PROVIDER = "framalink" -PROVIDERS_NETLOC = [urlparse(url, "http").netloc for f, url in PROVIDERS.values()] +PROVIDERS_NETLOC = [urlparse(web.getNormalizedURL(url), "http").netloc for f, url in PROVIDERS.values()] # LOADING ############################################################# @@ -82,7 +82,7 @@ def parseresponse(msg): if hasattr(msg, "text") and msg.text: urls = re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ :]+)", msg.text) for url in urls: - o = urlparse(url, "http") + o = urlparse(web._getNormalizedURL(url), "http") # Skip short URLs if o.netloc == "" or o.netloc in PROVIDERS or len(o.netloc) + len(o.path) < 17: @@ -118,7 +118,7 @@ def cmd_reduceurl(msg): res = list() for url in minify: - o = urlparse(url, "http") + o = urlparse(web.getNormalizedURL(url), "http") minief_url = reduce(url) if o.netloc == "": res.append(gen_response(minief_url, msg, o.scheme)) diff --git a/modules/networking/isup.py b/modules/networking/isup.py index a666db1..c518900 100644 --- a/modules/networking/isup.py +++ b/modules/networking/isup.py @@ -1,6 +1,6 @@ import urllib -from nemubot.tools.web import getJSON +from nemubot.tools.web import getNormalizedURL, getJSON def isup(url): """Determine if the given URL is up or not @@ -9,7 +9,7 @@ def isup(url): url -- the URL to check """ - o = urllib.parse.urlparse(url, "http") + o = urllib.parse.urlparse(getNormalizedURL(url), "http") if o.netloc != "": isup = getJSON("http://isitup.org/%s.json" % o.netloc) if isup is not None and "status_code" in isup and isup["status_code"] == 1: diff --git a/modules/networking/page.py b/modules/networking/page.py index 67ec8c7..6179e34 100644 --- a/modules/networking/page.py +++ b/modules/networking/page.py @@ -21,7 +21,7 @@ def headers(url): url -- the page URL to get header """ - o = urllib.parse.urlparse(url, "http") + o = urllib.parse.urlparse(web.getNormalizedURL(url), "http") if o.netloc == "": raise IRCException("invalid URL") if o.scheme == "http": diff --git a/modules/networking/w3c.py b/modules/networking/w3c.py index c8331c6..3d920ef 100644 --- a/modules/networking/w3c.py +++ b/modules/networking/w3c.py @@ -3,6 +3,7 @@ import urllib from nemubot import __version__ from nemubot.exception import IRCException +from nemubot.tools.web import getNormalizedURL def validator(url): """Run the w3c validator on the given URL @@ -11,7 +12,7 @@ def validator(url): url -- the URL to validate """ - o = urllib.parse.urlparse(url, "http") + o = urllib.parse.urlparse(getNormalizedURL(url), "http") if o.netloc == "": raise IRCException("Indicate a valid URL!") diff --git a/modules/networking/watchWebsite.py b/modules/networking/watchWebsite.py index 0c1f8d3..41ea7d3 100644 --- a/modules/networking/watchWebsite.py +++ b/modules/networking/watchWebsite.py @@ -8,10 +8,10 @@ from urllib.parse import urlparse from nemubot.event import ModuleEvent from nemubot.exception import IRCException from nemubot.hooks import hook +from nemubot.tools.web import getNormalizedURL from nemubot.tools.xmlparser.node import ModuleState logger = logging.getLogger("nemubot.module.networking.watchWebsite") -nemubotversion = 3.4 from more import Response @@ -56,7 +56,7 @@ def del_site(url, nick, channel, frm_owner): url -- URL to unwatch """ - o = urlparse(url, "http") + o = urlparse(getNormalizedURL(url), "http") if o.scheme != "" and url in DATAS.index: site = DATAS.index[url] for a in site.getNodes("alert"): @@ -80,7 +80,7 @@ def add_site(url, nick, channel, server, diffType="diff"): url -- URL to watch """ - o = urlparse(url, "http") + o = urlparse(getNormalizedURL(url), "http") if o.netloc == "": raise IRCException("sorry, I can't watch this URL :(") @@ -210,7 +210,7 @@ def start_watching(site, offset=0): offset -- offset time to delay the launch of the first check """ - o = urlparse(site["url"], "http") + o = urlparse(getNormalizedURL(site["url"]), "http") #print_debug("Add %s event for site: %s" % (site["type"], o.netloc)) try: diff --git a/modules/youtube-title.py b/modules/youtube-title.py index d4061b1..4bf115c 100644 --- a/modules/youtube-title.py +++ b/modules/youtube-title.py @@ -3,7 +3,7 @@ import re, json, subprocess from nemubot.exception import IRCException from nemubot.hooks import hook -from nemubot.tools.web import getURLContent +from nemubot.tools.web import _getNormalizedURL, getURLContent from more import Response """Get information of youtube videos""" @@ -85,7 +85,7 @@ def parseresponse(msg): if hasattr(msg, "text") and msg.text: urls = re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ :]+)", msg.text) for url in urls: - o = urlparse(url) + o = urlparse(_getNormalizedURL(url)) if o.scheme != "": if o.netloc == "" and len(o.path) < 10: continue diff --git a/nemubot/tools/web.py b/nemubot/tools/web.py index a730a9e..938e02d 100644 --- a/nemubot/tools/web.py +++ b/nemubot/tools/web.py @@ -23,13 +23,17 @@ from nemubot.exception import IRCException def isURL(url): """Return True if the URL can be parsed""" - o = urlparse(url) + o = urlparse(_getNormalizedURL(url)) return o.netloc == "" and o.path == "" +def _getNormalizedURL(url): + """Return a light normalized form for the given URL""" + return url if "//" in url else "//" + url + def getNormalizedURL(url): """Return a normalized form for the given URL""" - return urlunsplit(urlsplit(url, "http")) + return urlunsplit(urlsplit(_getNormalizedURL(url), "http")) def getScheme(url): @@ -40,27 +44,27 @@ def getScheme(url): def getHost(url): """Return the domain of a given URL""" - return urlparse(url, "http").hostname + return urlparse(_getNormalizedURL(url), "http").hostname def getPort(url): """Return the port of a given URL""" - return urlparse(url, "http").port + return urlparse(_getNormalizedURL(url), "http").port def getPath(url): """Return the page request of a given URL""" - return urlparse(url, "http").path + return urlparse(_getNormalizedURL(url), "http").path def getUser(url): """Return the page request of a given URL""" - return urlparse(url, "http").username + return urlparse(_getNormalizedURL(url), "http").username def getPassword(url): """Return the page request of a given URL""" - return urlparse(url, "http").password + return urlparse(_getNormalizedURL(url), "http").password # Get real pages @@ -74,7 +78,7 @@ def getURLContent(url, body=None, timeout=7, header=None): timeout -- maximum number of seconds to wait before returning an exception """ - o = urlparse(url, "http") + o = urlparse(_getNormalizedURL(url), "http") import http.client