tools/web: factorize getNormalizedURL
This commit is contained in:
parent
76ec0d26b4
commit
55e6550cb1
@ -34,7 +34,7 @@ PROVIDERS = {
|
||||
}
|
||||
DEFAULT_PROVIDER = "framalink"
|
||||
|
||||
PROVIDERS_NETLOC = [urlparse(url, "http").netloc for f, url in PROVIDERS.values()]
|
||||
PROVIDERS_NETLOC = [urlparse(web.getNormalizedURL(url), "http").netloc for f, url in PROVIDERS.values()]
|
||||
|
||||
# LOADING #############################################################
|
||||
|
||||
@ -82,7 +82,7 @@ def parseresponse(msg):
|
||||
if hasattr(msg, "text") and msg.text:
|
||||
urls = re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ :]+)", msg.text)
|
||||
for url in urls:
|
||||
o = urlparse(url, "http")
|
||||
o = urlparse(web._getNormalizedURL(url), "http")
|
||||
|
||||
# Skip short URLs
|
||||
if o.netloc == "" or o.netloc in PROVIDERS or len(o.netloc) + len(o.path) < 17:
|
||||
@ -118,7 +118,7 @@ def cmd_reduceurl(msg):
|
||||
|
||||
res = list()
|
||||
for url in minify:
|
||||
o = urlparse(url, "http")
|
||||
o = urlparse(web.getNormalizedURL(url), "http")
|
||||
minief_url = reduce(url)
|
||||
if o.netloc == "":
|
||||
res.append(gen_response(minief_url, msg, o.scheme))
|
||||
|
@ -1,6 +1,6 @@
|
||||
import urllib
|
||||
|
||||
from nemubot.tools.web import getJSON
|
||||
from nemubot.tools.web import getNormalizedURL, getJSON
|
||||
|
||||
def isup(url):
|
||||
"""Determine if the given URL is up or not
|
||||
@ -9,7 +9,7 @@ def isup(url):
|
||||
url -- the URL to check
|
||||
"""
|
||||
|
||||
o = urllib.parse.urlparse(url, "http")
|
||||
o = urllib.parse.urlparse(getNormalizedURL(url), "http")
|
||||
if o.netloc != "":
|
||||
isup = getJSON("http://isitup.org/%s.json" % o.netloc)
|
||||
if isup is not None and "status_code" in isup and isup["status_code"] == 1:
|
||||
|
@ -21,7 +21,7 @@ def headers(url):
|
||||
url -- the page URL to get header
|
||||
"""
|
||||
|
||||
o = urllib.parse.urlparse(url, "http")
|
||||
o = urllib.parse.urlparse(web.getNormalizedURL(url), "http")
|
||||
if o.netloc == "":
|
||||
raise IRCException("invalid URL")
|
||||
if o.scheme == "http":
|
||||
|
@ -3,6 +3,7 @@ import urllib
|
||||
|
||||
from nemubot import __version__
|
||||
from nemubot.exception import IRCException
|
||||
from nemubot.tools.web import getNormalizedURL
|
||||
|
||||
def validator(url):
|
||||
"""Run the w3c validator on the given URL
|
||||
@ -11,7 +12,7 @@ def validator(url):
|
||||
url -- the URL to validate
|
||||
"""
|
||||
|
||||
o = urllib.parse.urlparse(url, "http")
|
||||
o = urllib.parse.urlparse(getNormalizedURL(url), "http")
|
||||
if o.netloc == "":
|
||||
raise IRCException("Indicate a valid URL!")
|
||||
|
||||
|
@ -8,10 +8,10 @@ from urllib.parse import urlparse
|
||||
from nemubot.event import ModuleEvent
|
||||
from nemubot.exception import IRCException
|
||||
from nemubot.hooks import hook
|
||||
from nemubot.tools.web import getNormalizedURL
|
||||
from nemubot.tools.xmlparser.node import ModuleState
|
||||
|
||||
logger = logging.getLogger("nemubot.module.networking.watchWebsite")
|
||||
nemubotversion = 3.4
|
||||
|
||||
from more import Response
|
||||
|
||||
@ -56,7 +56,7 @@ def del_site(url, nick, channel, frm_owner):
|
||||
url -- URL to unwatch
|
||||
"""
|
||||
|
||||
o = urlparse(url, "http")
|
||||
o = urlparse(getNormalizedURL(url), "http")
|
||||
if o.scheme != "" and url in DATAS.index:
|
||||
site = DATAS.index[url]
|
||||
for a in site.getNodes("alert"):
|
||||
@ -80,7 +80,7 @@ def add_site(url, nick, channel, server, diffType="diff"):
|
||||
url -- URL to watch
|
||||
"""
|
||||
|
||||
o = urlparse(url, "http")
|
||||
o = urlparse(getNormalizedURL(url), "http")
|
||||
if o.netloc == "":
|
||||
raise IRCException("sorry, I can't watch this URL :(")
|
||||
|
||||
@ -210,7 +210,7 @@ def start_watching(site, offset=0):
|
||||
offset -- offset time to delay the launch of the first check
|
||||
"""
|
||||
|
||||
o = urlparse(site["url"], "http")
|
||||
o = urlparse(getNormalizedURL(site["url"]), "http")
|
||||
#print_debug("Add %s event for site: %s" % (site["type"], o.netloc))
|
||||
|
||||
try:
|
||||
|
@ -3,7 +3,7 @@ import re, json, subprocess
|
||||
|
||||
from nemubot.exception import IRCException
|
||||
from nemubot.hooks import hook
|
||||
from nemubot.tools.web import getURLContent
|
||||
from nemubot.tools.web import _getNormalizedURL, getURLContent
|
||||
from more import Response
|
||||
|
||||
"""Get information of youtube videos"""
|
||||
@ -85,7 +85,7 @@ def parseresponse(msg):
|
||||
if hasattr(msg, "text") and msg.text:
|
||||
urls = re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ :]+)", msg.text)
|
||||
for url in urls:
|
||||
o = urlparse(url)
|
||||
o = urlparse(_getNormalizedURL(url))
|
||||
if o.scheme != "":
|
||||
if o.netloc == "" and len(o.path) < 10:
|
||||
continue
|
||||
|
@ -23,13 +23,17 @@ from nemubot.exception import IRCException
|
||||
|
||||
def isURL(url):
|
||||
"""Return True if the URL can be parsed"""
|
||||
o = urlparse(url)
|
||||
o = urlparse(_getNormalizedURL(url))
|
||||
return o.netloc == "" and o.path == ""
|
||||
|
||||
|
||||
def _getNormalizedURL(url):
|
||||
"""Return a light normalized form for the given URL"""
|
||||
return url if "//" in url else "//" + url
|
||||
|
||||
def getNormalizedURL(url):
|
||||
"""Return a normalized form for the given URL"""
|
||||
return urlunsplit(urlsplit(url, "http"))
|
||||
return urlunsplit(urlsplit(_getNormalizedURL(url), "http"))
|
||||
|
||||
|
||||
def getScheme(url):
|
||||
@ -40,27 +44,27 @@ def getScheme(url):
|
||||
|
||||
def getHost(url):
|
||||
"""Return the domain of a given URL"""
|
||||
return urlparse(url, "http").hostname
|
||||
return urlparse(_getNormalizedURL(url), "http").hostname
|
||||
|
||||
|
||||
def getPort(url):
|
||||
"""Return the port of a given URL"""
|
||||
return urlparse(url, "http").port
|
||||
return urlparse(_getNormalizedURL(url), "http").port
|
||||
|
||||
|
||||
def getPath(url):
|
||||
"""Return the page request of a given URL"""
|
||||
return urlparse(url, "http").path
|
||||
return urlparse(_getNormalizedURL(url), "http").path
|
||||
|
||||
|
||||
def getUser(url):
|
||||
"""Return the page request of a given URL"""
|
||||
return urlparse(url, "http").username
|
||||
return urlparse(_getNormalizedURL(url), "http").username
|
||||
|
||||
|
||||
def getPassword(url):
|
||||
"""Return the page request of a given URL"""
|
||||
return urlparse(url, "http").password
|
||||
return urlparse(_getNormalizedURL(url), "http").password
|
||||
|
||||
|
||||
# Get real pages
|
||||
@ -74,7 +78,7 @@ def getURLContent(url, body=None, timeout=7, header=None):
|
||||
timeout -- maximum number of seconds to wait before returning an exception
|
||||
"""
|
||||
|
||||
o = urlparse(url, "http")
|
||||
o = urlparse(_getNormalizedURL(url), "http")
|
||||
|
||||
import http.client
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user