From dcb44ca3f24b111de4718db7db5fd652ef9abdc1 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Wed, 2 Aug 2017 19:58:49 +0200 Subject: [PATCH] tools/web: new parameter to choose max content size to retrieve --- nemubot/tools/web.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nemubot/tools/web.py b/nemubot/tools/web.py index 0394aac..164f5da 100644 --- a/nemubot/tools/web.py +++ b/nemubot/tools/web.py @@ -68,7 +68,8 @@ def getPassword(url): # Get real pages -def getURLContent(url, body=None, timeout=7, header=None, decode_error=False): +def getURLContent(url, body=None, timeout=7, header=None, decode_error=False, + max_size=524288): """Return page content corresponding to URL or None if any error occurs Arguments: @@ -76,6 +77,7 @@ def getURLContent(url, body=None, timeout=7, header=None, decode_error=False): body -- Data to send as POST content timeout -- maximum number of seconds to wait before returning an exception decode_error -- raise exception on non-200 pages or ignore it + max_size -- maximal size allow for the content """ o = urlparse(_getNormalizedURL(url), "http") @@ -135,7 +137,7 @@ def getURLContent(url, body=None, timeout=7, header=None, decode_error=False): size = int(res.getheader("Content-Length", 524288)) cntype = res.getheader("Content-Type") - if size > 524288 or (cntype is not None and cntype[:4] != "text" and cntype[:4] != "appl"): + if size > max_size or (cntype is not None and cntype[:4] != "text" and cntype[:4] != "appl"): raise IMException("Content too large to be retrieved") data = res.read(size) @@ -168,7 +170,8 @@ def getURLContent(url, body=None, timeout=7, header=None, decode_error=False): body=body, timeout=timeout, header=header, - decode_error=decode_error) + decode_error=decode_error, + max_size=max_size) elif decode_error: return data.decode(charset).strip() else: