tools/web: factorize getNormalizedURL
This commit is contained in:
parent
76ec0d26b4
commit
55e6550cb1
@ -34,7 +34,7 @@ PROVIDERS = {
|
|||||||
}
|
}
|
||||||
DEFAULT_PROVIDER = "framalink"
|
DEFAULT_PROVIDER = "framalink"
|
||||||
|
|
||||||
PROVIDERS_NETLOC = [urlparse(url, "http").netloc for f, url in PROVIDERS.values()]
|
PROVIDERS_NETLOC = [urlparse(web.getNormalizedURL(url), "http").netloc for f, url in PROVIDERS.values()]
|
||||||
|
|
||||||
# LOADING #############################################################
|
# LOADING #############################################################
|
||||||
|
|
||||||
@ -82,7 +82,7 @@ def parseresponse(msg):
|
|||||||
if hasattr(msg, "text") and msg.text:
|
if hasattr(msg, "text") and msg.text:
|
||||||
urls = re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ :]+)", msg.text)
|
urls = re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ :]+)", msg.text)
|
||||||
for url in urls:
|
for url in urls:
|
||||||
o = urlparse(url, "http")
|
o = urlparse(web._getNormalizedURL(url), "http")
|
||||||
|
|
||||||
# Skip short URLs
|
# Skip short URLs
|
||||||
if o.netloc == "" or o.netloc in PROVIDERS or len(o.netloc) + len(o.path) < 17:
|
if o.netloc == "" or o.netloc in PROVIDERS or len(o.netloc) + len(o.path) < 17:
|
||||||
@ -118,7 +118,7 @@ def cmd_reduceurl(msg):
|
|||||||
|
|
||||||
res = list()
|
res = list()
|
||||||
for url in minify:
|
for url in minify:
|
||||||
o = urlparse(url, "http")
|
o = urlparse(web.getNormalizedURL(url), "http")
|
||||||
minief_url = reduce(url)
|
minief_url = reduce(url)
|
||||||
if o.netloc == "":
|
if o.netloc == "":
|
||||||
res.append(gen_response(minief_url, msg, o.scheme))
|
res.append(gen_response(minief_url, msg, o.scheme))
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
from nemubot.tools.web import getJSON
|
from nemubot.tools.web import getNormalizedURL, getJSON
|
||||||
|
|
||||||
def isup(url):
|
def isup(url):
|
||||||
"""Determine if the given URL is up or not
|
"""Determine if the given URL is up or not
|
||||||
@ -9,7 +9,7 @@ def isup(url):
|
|||||||
url -- the URL to check
|
url -- the URL to check
|
||||||
"""
|
"""
|
||||||
|
|
||||||
o = urllib.parse.urlparse(url, "http")
|
o = urllib.parse.urlparse(getNormalizedURL(url), "http")
|
||||||
if o.netloc != "":
|
if o.netloc != "":
|
||||||
isup = getJSON("http://isitup.org/%s.json" % o.netloc)
|
isup = getJSON("http://isitup.org/%s.json" % o.netloc)
|
||||||
if isup is not None and "status_code" in isup and isup["status_code"] == 1:
|
if isup is not None and "status_code" in isup and isup["status_code"] == 1:
|
||||||
|
@ -21,7 +21,7 @@ def headers(url):
|
|||||||
url -- the page URL to get header
|
url -- the page URL to get header
|
||||||
"""
|
"""
|
||||||
|
|
||||||
o = urllib.parse.urlparse(url, "http")
|
o = urllib.parse.urlparse(web.getNormalizedURL(url), "http")
|
||||||
if o.netloc == "":
|
if o.netloc == "":
|
||||||
raise IRCException("invalid URL")
|
raise IRCException("invalid URL")
|
||||||
if o.scheme == "http":
|
if o.scheme == "http":
|
||||||
|
@ -3,6 +3,7 @@ import urllib
|
|||||||
|
|
||||||
from nemubot import __version__
|
from nemubot import __version__
|
||||||
from nemubot.exception import IRCException
|
from nemubot.exception import IRCException
|
||||||
|
from nemubot.tools.web import getNormalizedURL
|
||||||
|
|
||||||
def validator(url):
|
def validator(url):
|
||||||
"""Run the w3c validator on the given URL
|
"""Run the w3c validator on the given URL
|
||||||
@ -11,7 +12,7 @@ def validator(url):
|
|||||||
url -- the URL to validate
|
url -- the URL to validate
|
||||||
"""
|
"""
|
||||||
|
|
||||||
o = urllib.parse.urlparse(url, "http")
|
o = urllib.parse.urlparse(getNormalizedURL(url), "http")
|
||||||
if o.netloc == "":
|
if o.netloc == "":
|
||||||
raise IRCException("Indicate a valid URL!")
|
raise IRCException("Indicate a valid URL!")
|
||||||
|
|
||||||
|
@ -8,10 +8,10 @@ from urllib.parse import urlparse
|
|||||||
from nemubot.event import ModuleEvent
|
from nemubot.event import ModuleEvent
|
||||||
from nemubot.exception import IRCException
|
from nemubot.exception import IRCException
|
||||||
from nemubot.hooks import hook
|
from nemubot.hooks import hook
|
||||||
|
from nemubot.tools.web import getNormalizedURL
|
||||||
from nemubot.tools.xmlparser.node import ModuleState
|
from nemubot.tools.xmlparser.node import ModuleState
|
||||||
|
|
||||||
logger = logging.getLogger("nemubot.module.networking.watchWebsite")
|
logger = logging.getLogger("nemubot.module.networking.watchWebsite")
|
||||||
nemubotversion = 3.4
|
|
||||||
|
|
||||||
from more import Response
|
from more import Response
|
||||||
|
|
||||||
@ -56,7 +56,7 @@ def del_site(url, nick, channel, frm_owner):
|
|||||||
url -- URL to unwatch
|
url -- URL to unwatch
|
||||||
"""
|
"""
|
||||||
|
|
||||||
o = urlparse(url, "http")
|
o = urlparse(getNormalizedURL(url), "http")
|
||||||
if o.scheme != "" and url in DATAS.index:
|
if o.scheme != "" and url in DATAS.index:
|
||||||
site = DATAS.index[url]
|
site = DATAS.index[url]
|
||||||
for a in site.getNodes("alert"):
|
for a in site.getNodes("alert"):
|
||||||
@ -80,7 +80,7 @@ def add_site(url, nick, channel, server, diffType="diff"):
|
|||||||
url -- URL to watch
|
url -- URL to watch
|
||||||
"""
|
"""
|
||||||
|
|
||||||
o = urlparse(url, "http")
|
o = urlparse(getNormalizedURL(url), "http")
|
||||||
if o.netloc == "":
|
if o.netloc == "":
|
||||||
raise IRCException("sorry, I can't watch this URL :(")
|
raise IRCException("sorry, I can't watch this URL :(")
|
||||||
|
|
||||||
@ -210,7 +210,7 @@ def start_watching(site, offset=0):
|
|||||||
offset -- offset time to delay the launch of the first check
|
offset -- offset time to delay the launch of the first check
|
||||||
"""
|
"""
|
||||||
|
|
||||||
o = urlparse(site["url"], "http")
|
o = urlparse(getNormalizedURL(site["url"]), "http")
|
||||||
#print_debug("Add %s event for site: %s" % (site["type"], o.netloc))
|
#print_debug("Add %s event for site: %s" % (site["type"], o.netloc))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -3,7 +3,7 @@ import re, json, subprocess
|
|||||||
|
|
||||||
from nemubot.exception import IRCException
|
from nemubot.exception import IRCException
|
||||||
from nemubot.hooks import hook
|
from nemubot.hooks import hook
|
||||||
from nemubot.tools.web import getURLContent
|
from nemubot.tools.web import _getNormalizedURL, getURLContent
|
||||||
from more import Response
|
from more import Response
|
||||||
|
|
||||||
"""Get information of youtube videos"""
|
"""Get information of youtube videos"""
|
||||||
@ -85,7 +85,7 @@ def parseresponse(msg):
|
|||||||
if hasattr(msg, "text") and msg.text:
|
if hasattr(msg, "text") and msg.text:
|
||||||
urls = re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ :]+)", msg.text)
|
urls = re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ :]+)", msg.text)
|
||||||
for url in urls:
|
for url in urls:
|
||||||
o = urlparse(url)
|
o = urlparse(_getNormalizedURL(url))
|
||||||
if o.scheme != "":
|
if o.scheme != "":
|
||||||
if o.netloc == "" and len(o.path) < 10:
|
if o.netloc == "" and len(o.path) < 10:
|
||||||
continue
|
continue
|
||||||
|
@ -23,13 +23,17 @@ from nemubot.exception import IRCException
|
|||||||
|
|
||||||
def isURL(url):
|
def isURL(url):
|
||||||
"""Return True if the URL can be parsed"""
|
"""Return True if the URL can be parsed"""
|
||||||
o = urlparse(url)
|
o = urlparse(_getNormalizedURL(url))
|
||||||
return o.netloc == "" and o.path == ""
|
return o.netloc == "" and o.path == ""
|
||||||
|
|
||||||
|
|
||||||
|
def _getNormalizedURL(url):
|
||||||
|
"""Return a light normalized form for the given URL"""
|
||||||
|
return url if "//" in url else "//" + url
|
||||||
|
|
||||||
def getNormalizedURL(url):
|
def getNormalizedURL(url):
|
||||||
"""Return a normalized form for the given URL"""
|
"""Return a normalized form for the given URL"""
|
||||||
return urlunsplit(urlsplit(url, "http"))
|
return urlunsplit(urlsplit(_getNormalizedURL(url), "http"))
|
||||||
|
|
||||||
|
|
||||||
def getScheme(url):
|
def getScheme(url):
|
||||||
@ -40,27 +44,27 @@ def getScheme(url):
|
|||||||
|
|
||||||
def getHost(url):
|
def getHost(url):
|
||||||
"""Return the domain of a given URL"""
|
"""Return the domain of a given URL"""
|
||||||
return urlparse(url, "http").hostname
|
return urlparse(_getNormalizedURL(url), "http").hostname
|
||||||
|
|
||||||
|
|
||||||
def getPort(url):
|
def getPort(url):
|
||||||
"""Return the port of a given URL"""
|
"""Return the port of a given URL"""
|
||||||
return urlparse(url, "http").port
|
return urlparse(_getNormalizedURL(url), "http").port
|
||||||
|
|
||||||
|
|
||||||
def getPath(url):
|
def getPath(url):
|
||||||
"""Return the page request of a given URL"""
|
"""Return the page request of a given URL"""
|
||||||
return urlparse(url, "http").path
|
return urlparse(_getNormalizedURL(url), "http").path
|
||||||
|
|
||||||
|
|
||||||
def getUser(url):
|
def getUser(url):
|
||||||
"""Return the page request of a given URL"""
|
"""Return the page request of a given URL"""
|
||||||
return urlparse(url, "http").username
|
return urlparse(_getNormalizedURL(url), "http").username
|
||||||
|
|
||||||
|
|
||||||
def getPassword(url):
|
def getPassword(url):
|
||||||
"""Return the page request of a given URL"""
|
"""Return the page request of a given URL"""
|
||||||
return urlparse(url, "http").password
|
return urlparse(_getNormalizedURL(url), "http").password
|
||||||
|
|
||||||
|
|
||||||
# Get real pages
|
# Get real pages
|
||||||
@ -74,7 +78,7 @@ def getURLContent(url, body=None, timeout=7, header=None):
|
|||||||
timeout -- maximum number of seconds to wait before returning an exception
|
timeout -- maximum number of seconds to wait before returning an exception
|
||||||
"""
|
"""
|
||||||
|
|
||||||
o = urlparse(url, "http")
|
o = urlparse(_getNormalizedURL(url), "http")
|
||||||
|
|
||||||
import http.client
|
import http.client
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user