tools/web: add a URL normalizer function
This commit is contained in:
parent
7102e08000
commit
20105e7d98
@ -10,8 +10,6 @@ def isup(url):
|
||||
"""
|
||||
|
||||
o = urllib.parse.urlparse(url, "http")
|
||||
if o.netloc == "":
|
||||
o = urllib.parse.urlparse("http://" + url)
|
||||
if o.netloc != "":
|
||||
isup = getJSON("http://isitup.org/%s.json" % o.netloc)
|
||||
if isup is not None and "status_code" in isup and isup["status_code"] == 1:
|
||||
|
@ -13,9 +13,7 @@ def validator(url):
|
||||
|
||||
o = urllib.parse.urlparse(url, "http")
|
||||
if o.netloc == "":
|
||||
o = urllib.parse.urlparse("http://" + url)
|
||||
if o.netloc == "":
|
||||
raise IRCException("Indiquer une URL valide !")
|
||||
raise IRCException("Indicate a valid URL!")
|
||||
|
||||
try:
|
||||
req = urllib.request.Request("http://validator.w3.org/check?uri=%s&output=json" % (urllib.parse.quote(o.geturl())), headers={ 'User-Agent' : "Nemubot v%s" % __version__})
|
||||
|
@ -16,7 +16,7 @@
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlparse, urlsplit, urlunsplit
|
||||
|
||||
from nemubot.exception import IRCException
|
||||
|
||||
@ -24,38 +24,43 @@ from nemubot.exception import IRCException
|
||||
def isURL(url):
|
||||
"""Return True if the URL can be parsed"""
|
||||
o = urlparse(url)
|
||||
return o.scheme == "" and o.netloc == "" and o.path == ""
|
||||
return o.netloc == "" and o.path == ""
|
||||
|
||||
|
||||
def getNormalizedURL(url):
|
||||
"""Return a normalized form for the given URL"""
|
||||
return urlunsplit(urlsplit(url, "http"))
|
||||
|
||||
|
||||
def getScheme(url):
|
||||
"""Return the protocol of a given URL"""
|
||||
o = urlparse(url)
|
||||
o = urlparse(url, "http")
|
||||
return o.scheme
|
||||
|
||||
|
||||
def getHost(url):
|
||||
"""Return the domain of a given URL"""
|
||||
return urlparse(url).hostname
|
||||
return urlparse(url, "http").hostname
|
||||
|
||||
|
||||
def getPort(url):
|
||||
"""Return the port of a given URL"""
|
||||
return urlparse(url).port
|
||||
return urlparse(url, "http").port
|
||||
|
||||
|
||||
def getPath(url):
|
||||
"""Return the page request of a given URL"""
|
||||
return urlparse(url).path
|
||||
return urlparse(url, "http").path
|
||||
|
||||
|
||||
def getUser(url):
|
||||
"""Return the page request of a given URL"""
|
||||
return urlparse(url).username
|
||||
return urlparse(url, "http").username
|
||||
|
||||
|
||||
def getPassword(url):
|
||||
"""Return the page request of a given URL"""
|
||||
return urlparse(url).password
|
||||
return urlparse(url, "http").password
|
||||
|
||||
|
||||
# Get real pages
|
||||
@ -69,9 +74,7 @@ def getURLContent(url, body=None, timeout=7, header=None):
|
||||
timeout -- maximum number of seconds to wait before returning an exception
|
||||
"""
|
||||
|
||||
o = urlparse(url)
|
||||
if o.netloc == "":
|
||||
o = urlparse("http://" + url)
|
||||
o = urlparse(url, "http")
|
||||
|
||||
import http.client
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user