Tools.web: use urllib instead of custom urlparser
This commit is contained in:
parent
2fc9c89c9e
commit
a11c617ade
2 changed files with 21 additions and 103 deletions
|
@ -26,8 +26,10 @@ def get_courses(classe=None, room=None, teacher=None, date=None):
|
||||||
url += "&teacher=" + quote(teacher)
|
url += "&teacher=" + quote(teacher)
|
||||||
#TODO: date, not implemented at 23.tf
|
#TODO: date, not implemented at 23.tf
|
||||||
|
|
||||||
|
print_debug(url)
|
||||||
response = web.getXML(url)
|
response = web.getXML(url)
|
||||||
if response is not None:
|
if response is not None:
|
||||||
|
print_debug(response)
|
||||||
return response.getNodes("course")
|
return response.getNodes("course")
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
@ -55,6 +57,7 @@ def cmd_chronos(msg):
|
||||||
res = Response(msg.sender, channel=msg.channel, nomore="Je n'ai pas d'autre cours à afficher")
|
res = Response(msg.sender, channel=msg.channel, nomore="Je n'ai pas d'autre cours à afficher")
|
||||||
|
|
||||||
courses = get_courses(classe)
|
courses = get_courses(classe)
|
||||||
|
print_debug(courses)
|
||||||
if courses is not None:
|
if courses is not None:
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
tomorrow = now + timedelta(days=1)
|
tomorrow = now + timedelta(days=1)
|
||||||
|
|
121
tools/web.py
121
tools/web.py
|
@ -21,91 +21,57 @@ import json
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
import xmlparser
|
import xmlparser
|
||||||
|
|
||||||
# Parse URL
|
|
||||||
|
|
||||||
def parseURL(url):
|
|
||||||
"""Separate protocol, domain, port and page request"""
|
|
||||||
res = re.match("^([a-zA-Z0-9+.-]+):(//)?(.*)$", url)
|
|
||||||
if res is not None:
|
|
||||||
scheme = res.group(1)
|
|
||||||
if scheme == "https":
|
|
||||||
port = 443
|
|
||||||
elif scheme == "http":
|
|
||||||
port = 80
|
|
||||||
elif scheme == "ftp":
|
|
||||||
port = 21
|
|
||||||
elif scheme == "telnet":
|
|
||||||
port = 23
|
|
||||||
else:
|
|
||||||
port = 0
|
|
||||||
if res.group(2) == "//":
|
|
||||||
# Waiting Common Internet Scheme Syntax
|
|
||||||
ciss = re.match("^(([^:]+)(:([^@]+))@)?([^:/]+)(:([0-9]+))?(/.*)$", res.group(3))
|
|
||||||
user = ciss.group(2)
|
|
||||||
password = ciss.group(4)
|
|
||||||
host = ciss.group(5)
|
|
||||||
if ciss.group(7) is not None and ciss.group(7) != "" and int(ciss.group(7)) > 0:
|
|
||||||
port = int(ciss.group(7))
|
|
||||||
path = ciss.group(8)
|
|
||||||
return (scheme, (user, password), host, port, path)
|
|
||||||
else:
|
|
||||||
return (scheme, (None, None), None, None, res.group(3))
|
|
||||||
else:
|
|
||||||
return (None, (None, None), None, None, None)
|
|
||||||
|
|
||||||
def isURL(url):
|
def isURL(url):
|
||||||
"""Return True if the URL can be parsed"""
|
"""Return True if the URL can be parsed"""
|
||||||
(scheme, (user, password), host, port, path) = parseURL(url)
|
o = urlparse(url)
|
||||||
return scheme is not None
|
return o.scheme == "" and o.netloc == "" and o.path == ""
|
||||||
|
|
||||||
def getScheme(url):
|
def getScheme(url):
|
||||||
"""Return the protocol of a given URL"""
|
"""Return the protocol of a given URL"""
|
||||||
(scheme, (user, password), host, port, path) = parseURL(url)
|
o = urlparse(url)
|
||||||
return scheme
|
return o.scheme
|
||||||
|
|
||||||
def getHost(url):
|
def getHost(url):
|
||||||
"""Return the domain of a given URL"""
|
"""Return the domain of a given URL"""
|
||||||
(scheme, (user, password), host, port, path) = parseURL(url)
|
return urlparse(url).netloc
|
||||||
return host
|
|
||||||
|
|
||||||
def getPort(url):
|
def getPort(url):
|
||||||
"""Return the port of a given URL"""
|
"""Return the port of a given URL"""
|
||||||
(scheme, (user, password), host, port, path) = parseURL(url)
|
return urlparse(url).port
|
||||||
return port
|
|
||||||
|
|
||||||
def getPath(url):
|
def getPath(url):
|
||||||
"""Return the page request of a given URL"""
|
"""Return the page request of a given URL"""
|
||||||
(scheme, (user, password), host, port, path) = parseURL(url)
|
return urlparse(url).path
|
||||||
return path
|
|
||||||
|
|
||||||
def getUser(url):
|
def getUser(url):
|
||||||
"""Return the page request of a given URL"""
|
"""Return the page request of a given URL"""
|
||||||
(scheme, (user, password), host, port, path) = parseURL(url)
|
return urlparse(url).username
|
||||||
return user
|
|
||||||
def getPassword(url):
|
def getPassword(url):
|
||||||
"""Return the page request of a given URL"""
|
"""Return the page request of a given URL"""
|
||||||
(scheme, (user, password), host, port, path) = parseURL(url)
|
return urlparse(url).password
|
||||||
return password
|
|
||||||
|
|
||||||
|
|
||||||
# Get real pages
|
# Get real pages
|
||||||
|
|
||||||
def getURLContent(url, timeout=15):
|
def getURLContent(url, timeout=15):
|
||||||
"""Return page content corresponding to URL or None if any error occurs"""
|
"""Return page content corresponding to URL or None if any error occurs"""
|
||||||
(scheme, (user, password), host, port, path) = parseURL(url)
|
o = urlparse(url)
|
||||||
if port is None:
|
conn = http.client.HTTPConnection(o.netloc, port=o.port, timeout=timeout)
|
||||||
return None
|
|
||||||
conn = http.client.HTTPConnection(host, port=port, timeout=timeout)
|
|
||||||
try:
|
try:
|
||||||
conn.request("GET", path, None, {"User-agent": "Nemubot v3"})
|
if o.query != '':
|
||||||
|
conn.request("GET", o.path + "?" + o.query, None, {"User-agent": "Nemubot v3"})
|
||||||
|
else:
|
||||||
|
conn.request("GET", o.path, None, {"User-agent": "Nemubot v3"})
|
||||||
except socket.timeout:
|
except socket.timeout:
|
||||||
return None
|
return None
|
||||||
except socket.gaierror:
|
except socket.gaierror:
|
||||||
print ("<tools.web> Unable to receive page %s from %s on %d."
|
print ("<tools.web> Unable to receive page %s from %s on %d."
|
||||||
% (path, host, port))
|
% (o.path, o.netloc, o.port))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -145,60 +111,9 @@ def getJSON(url, timeout=15):
|
||||||
else:
|
else:
|
||||||
return json.loads(cnt.decode())
|
return json.loads(cnt.decode())
|
||||||
|
|
||||||
def traceURL(url, timeout=5, stack=None):
|
|
||||||
"""Follow redirections and return the redirections stack"""
|
|
||||||
if stack is None:
|
|
||||||
stack = list()
|
|
||||||
stack.append(url)
|
|
||||||
|
|
||||||
(scheme, (user, password), host, port, path) = parseURL(url)
|
|
||||||
if port is None or port == 0:
|
|
||||||
return stack
|
|
||||||
conn = http.client.HTTPConnection(host, port=port, timeout=timeout)
|
|
||||||
try:
|
|
||||||
conn.request("HEAD", path, None, {"User-agent": "Nemubot v3"})
|
|
||||||
except socket.timeout:
|
|
||||||
stack.append("Timeout")
|
|
||||||
return stack
|
|
||||||
except socket.gaierror:
|
|
||||||
print ("<tools.web> Unable to receive page %s from %s on %d."
|
|
||||||
% (path, host, port))
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
res = conn.getresponse()
|
|
||||||
except http.client.BadStatusLine:
|
|
||||||
return None
|
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
if res.status == http.client.OK:
|
|
||||||
return stack
|
|
||||||
elif res.status == http.client.FOUND or res.status == http.client.MOVED_PERMANENTLY or res.status == http.client.SEE_OTHER:
|
|
||||||
url = res.getheader("Location")
|
|
||||||
if url in stack:
|
|
||||||
stack.append(url)
|
|
||||||
return stack
|
|
||||||
else:
|
|
||||||
return traceURL(url, timeout, stack)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# Other utils
|
# Other utils
|
||||||
|
|
||||||
def striphtml(data):
|
def striphtml(data):
|
||||||
"""Remove HTML tags from text"""
|
"""Remove HTML tags from text"""
|
||||||
p = re.compile(r'<.*?>')
|
p = re.compile(r'<.*?>')
|
||||||
return p.sub('', data).replace("(", "/(").replace(")", ")/").replace(""", "\"")
|
return p.sub('', data).replace("(", "/(").replace(")", ")/").replace(""", "\"")
|
||||||
|
|
||||||
|
|
||||||
# Tests when called alone
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print(parseURL("http://www.nemunai.re/"))
|
|
||||||
print(parseURL("http://www.nemunai.re/?p0m"))
|
|
||||||
print(parseURL("http://www.nemunai.re/?p0m"))
|
|
||||||
print(parseURL("http://www.nemunai.re:42/?p0m"))
|
|
||||||
print(parseURL("ftp://www.nemunai.re:42/?p0m"))
|
|
||||||
print(parseURL("http://www.nemunai.re/?p0m"))
|
|
||||||
print(parseURL("magnet:ceciestunmagnet!"))
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue