tools/web: add a URL normalizer function
This commit is contained in:
parent
7102e08000
commit
20105e7d98
@ -10,8 +10,6 @@ def isup(url):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
o = urllib.parse.urlparse(url, "http")
|
o = urllib.parse.urlparse(url, "http")
|
||||||
if o.netloc == "":
|
|
||||||
o = urllib.parse.urlparse("http://" + url)
|
|
||||||
if o.netloc != "":
|
if o.netloc != "":
|
||||||
isup = getJSON("http://isitup.org/%s.json" % o.netloc)
|
isup = getJSON("http://isitup.org/%s.json" % o.netloc)
|
||||||
if isup is not None and "status_code" in isup and isup["status_code"] == 1:
|
if isup is not None and "status_code" in isup and isup["status_code"] == 1:
|
||||||
|
@ -13,9 +13,7 @@ def validator(url):
|
|||||||
|
|
||||||
o = urllib.parse.urlparse(url, "http")
|
o = urllib.parse.urlparse(url, "http")
|
||||||
if o.netloc == "":
|
if o.netloc == "":
|
||||||
o = urllib.parse.urlparse("http://" + url)
|
raise IRCException("Indicate a valid URL!")
|
||||||
if o.netloc == "":
|
|
||||||
raise IRCException("Indiquer une URL valide !")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
req = urllib.request.Request("http://validator.w3.org/check?uri=%s&output=json" % (urllib.parse.quote(o.geturl())), headers={ 'User-Agent' : "Nemubot v%s" % __version__})
|
req = urllib.request.Request("http://validator.w3.org/check?uri=%s&output=json" % (urllib.parse.quote(o.geturl())), headers={ 'User-Agent' : "Nemubot v%s" % __version__})
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse, urlsplit, urlunsplit
|
||||||
|
|
||||||
from nemubot.exception import IRCException
|
from nemubot.exception import IRCException
|
||||||
|
|
||||||
@ -24,38 +24,43 @@ from nemubot.exception import IRCException
|
|||||||
def isURL(url):
|
def isURL(url):
|
||||||
"""Return True if the URL can be parsed"""
|
"""Return True if the URL can be parsed"""
|
||||||
o = urlparse(url)
|
o = urlparse(url)
|
||||||
return o.scheme == "" and o.netloc == "" and o.path == ""
|
return o.netloc == "" and o.path == ""
|
||||||
|
|
||||||
|
|
||||||
|
def getNormalizedURL(url):
|
||||||
|
"""Return a normalized form for the given URL"""
|
||||||
|
return urlunsplit(urlsplit(url, "http"))
|
||||||
|
|
||||||
|
|
||||||
def getScheme(url):
|
def getScheme(url):
|
||||||
"""Return the protocol of a given URL"""
|
"""Return the protocol of a given URL"""
|
||||||
o = urlparse(url)
|
o = urlparse(url, "http")
|
||||||
return o.scheme
|
return o.scheme
|
||||||
|
|
||||||
|
|
||||||
def getHost(url):
|
def getHost(url):
|
||||||
"""Return the domain of a given URL"""
|
"""Return the domain of a given URL"""
|
||||||
return urlparse(url).hostname
|
return urlparse(url, "http").hostname
|
||||||
|
|
||||||
|
|
||||||
def getPort(url):
|
def getPort(url):
|
||||||
"""Return the port of a given URL"""
|
"""Return the port of a given URL"""
|
||||||
return urlparse(url).port
|
return urlparse(url, "http").port
|
||||||
|
|
||||||
|
|
||||||
def getPath(url):
|
def getPath(url):
|
||||||
"""Return the page request of a given URL"""
|
"""Return the page request of a given URL"""
|
||||||
return urlparse(url).path
|
return urlparse(url, "http").path
|
||||||
|
|
||||||
|
|
||||||
def getUser(url):
|
def getUser(url):
|
||||||
"""Return the page request of a given URL"""
|
"""Return the page request of a given URL"""
|
||||||
return urlparse(url).username
|
return urlparse(url, "http").username
|
||||||
|
|
||||||
|
|
||||||
def getPassword(url):
|
def getPassword(url):
|
||||||
"""Return the page request of a given URL"""
|
"""Return the page request of a given URL"""
|
||||||
return urlparse(url).password
|
return urlparse(url, "http").password
|
||||||
|
|
||||||
|
|
||||||
# Get real pages
|
# Get real pages
|
||||||
@ -69,9 +74,7 @@ def getURLContent(url, body=None, timeout=7, header=None):
|
|||||||
timeout -- maximum number of seconds to wait before returning an exception
|
timeout -- maximum number of seconds to wait before returning an exception
|
||||||
"""
|
"""
|
||||||
|
|
||||||
o = urlparse(url)
|
o = urlparse(url, "http")
|
||||||
if o.netloc == "":
|
|
||||||
o = urlparse("http://" + url)
|
|
||||||
|
|
||||||
import http.client
|
import http.client
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user