tools/web: add a URL normalizer function

This commit is contained in:
nemunaire 2015-10-11 17:04:21 +02:00
parent 7102e08000
commit 20105e7d98
3 changed files with 15 additions and 16 deletions

View File

@ -10,8 +10,6 @@ def isup(url):
"""
o = urllib.parse.urlparse(url, "http")
if o.netloc == "":
o = urllib.parse.urlparse("http://" + url)
if o.netloc != "":
isup = getJSON("http://isitup.org/%s.json" % o.netloc)
if isup is not None and "status_code" in isup and isup["status_code"] == 1:

View File

@ -13,9 +13,7 @@ def validator(url):
o = urllib.parse.urlparse(url, "http")
if o.netloc == "":
o = urllib.parse.urlparse("http://" + url)
if o.netloc == "":
raise IRCException("Indiquer une URL valide !")
raise IRCException("Indicate a valid URL!")
try:
req = urllib.request.Request("http://validator.w3.org/check?uri=%s&output=json" % (urllib.parse.quote(o.geturl())), headers={ 'User-Agent' : "Nemubot v%s" % __version__})

View File

@ -16,7 +16,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from urllib.parse import urlparse
from urllib.parse import urlparse, urlsplit, urlunsplit
from nemubot.exception import IRCException
@ -24,38 +24,43 @@ from nemubot.exception import IRCException
def isURL(url):
"""Return True if the URL can be parsed"""
o = urlparse(url)
return o.scheme == "" and o.netloc == "" and o.path == ""
return o.netloc == "" and o.path == ""
def getNormalizedURL(url):
"""Return a normalized form for the given URL"""
return urlunsplit(urlsplit(url, "http"))
def getScheme(url):
"""Return the protocol of a given URL"""
o = urlparse(url)
o = urlparse(url, "http")
return o.scheme
def getHost(url):
"""Return the domain of a given URL"""
return urlparse(url).hostname
return urlparse(url, "http").hostname
def getPort(url):
"""Return the port of a given URL"""
return urlparse(url).port
return urlparse(url, "http").port
def getPath(url):
"""Return the page request of a given URL"""
return urlparse(url).path
return urlparse(url, "http").path
def getUser(url):
"""Return the page request of a given URL"""
return urlparse(url).username
return urlparse(url, "http").username
def getPassword(url):
"""Return the page request of a given URL"""
return urlparse(url).password
return urlparse(url, "http").password
# Get real pages
@ -69,9 +74,7 @@ def getURLContent(url, body=None, timeout=7, header=None):
timeout -- maximum number of seconds to wait before returning an exception
"""
o = urlparse(url)
if o.netloc == "":
o = urlparse("http://" + url)
o = urlparse(url, "http")
import http.client