tools/web: split getURLContent function
This commit is contained in:
parent
3b99099b52
commit
5578e8b86e
@ -68,18 +68,7 @@ def getPassword(url):
|
|||||||
|
|
||||||
# Get real pages
|
# Get real pages
|
||||||
|
|
||||||
def getURLContent(url, body=None, timeout=7, header=None, decode_error=False,
|
def _URLConn(cb, url, body=None, timeout=7, header=None):
|
||||||
max_size=524288):
|
|
||||||
"""Return page content corresponding to URL or None if any error occurs
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
url -- the URL to get
|
|
||||||
body -- Data to send as POST content
|
|
||||||
timeout -- maximum number of seconds to wait before returning an exception
|
|
||||||
decode_error -- raise exception on non-200 pages or ignore it
|
|
||||||
max_size -- maximal size allow for the content
|
|
||||||
"""
|
|
||||||
|
|
||||||
o = urlparse(_getNormalizedURL(url), "http")
|
o = urlparse(_getNormalizedURL(url), "http")
|
||||||
|
|
||||||
import http.client
|
import http.client
|
||||||
@ -134,6 +123,27 @@ def getURLContent(url, body=None, timeout=7, header=None, decode_error=False,
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
res = conn.getresponse()
|
res = conn.getresponse()
|
||||||
|
return cb(res)
|
||||||
|
except http.client.BadStatusLine:
|
||||||
|
raise IMException("Invalid HTTP response")
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def getURLContent(url, body=None, timeout=7, header=None, decode_error=False,
|
||||||
|
max_size=524288):
|
||||||
|
"""Return page content corresponding to URL or None if any error occurs
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
url -- the URL to get
|
||||||
|
body -- Data to send as POST content
|
||||||
|
timeout -- maximum number of seconds to wait before returning an exception
|
||||||
|
decode_error -- raise exception on non-200 pages or ignore it
|
||||||
|
max_size -- maximal size allow for the content
|
||||||
|
"""
|
||||||
|
|
||||||
|
import http.client
|
||||||
|
|
||||||
|
def next(res):
|
||||||
size = int(res.getheader("Content-Length", 524288))
|
size = int(res.getheader("Content-Length", 524288))
|
||||||
cntype = res.getheader("Content-Type")
|
cntype = res.getheader("Content-Type")
|
||||||
|
|
||||||
@ -155,10 +165,6 @@ def getURLContent(url, body=None, timeout=7, header=None, decode_error=False,
|
|||||||
charset = cha[1]
|
charset = cha[1]
|
||||||
else:
|
else:
|
||||||
charset = cha[0]
|
charset = cha[0]
|
||||||
except http.client.BadStatusLine:
|
|
||||||
raise IMException("Invalid HTTP response")
|
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
if res.status == http.client.OK or res.status == http.client.SEE_OTHER:
|
if res.status == http.client.OK or res.status == http.client.SEE_OTHER:
|
||||||
return data.decode(charset).strip()
|
return data.decode(charset).strip()
|
||||||
@ -177,6 +183,7 @@ def getURLContent(url, body=None, timeout=7, header=None, decode_error=False,
|
|||||||
else:
|
else:
|
||||||
raise IMException("A HTTP error occurs: %d - %s" %
|
raise IMException("A HTTP error occurs: %d - %s" %
|
||||||
(res.status, http.client.responses[res.status]))
|
(res.status, http.client.responses[res.status]))
|
||||||
|
return _URLConn(next, url=url, body=body, timeout=timeout)
|
||||||
|
|
||||||
|
|
||||||
def getXML(*args, **kwargs):
|
def getXML(*args, **kwargs):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user