From fa77a3b323ada286f909b0da7a7921490380528d Mon Sep 17 00:00:00 2001 From: nemunaire Date: Tue, 8 Jul 2014 02:44:20 +0200 Subject: [PATCH] Fix decoding of some pages --- modules/syno.py | 2 +- tools/web.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/syno.py b/modules/syno.py index 047fe03..2d0a98a 100644 --- a/modules/syno.py +++ b/modules/syno.py @@ -52,7 +52,7 @@ def get_synos(word): page = web.getURLContent(url) if page is not None: synos = list() - for line in page.decode().split("\n"): + for line in page.split("\n"): if re.match("[ \t]*]*>.*[ \t]*.*", line) is not None: for elt in re.finditer(">&[^;]+;([^&]*)&[^;]+;<", line): synos.append(elt.group(1)) diff --git a/tools/web.py b/tools/web.py index 4266158..88a52ff 100644 --- a/tools/web.py +++ b/tools/web.py @@ -95,8 +95,9 @@ def getURLContent(url, timeout=15): data = res.read(size) # Decode content - charset = res.getheader("Content-Type").split(";") - if len(charset) > 1: + charset = "utf-8" + lcharset = res.getheader("Content-Type").split(";") + if len(lcharset) > 1: for c in charset: ch = c.split("=") if ch[0].strip().lower() == "charset" and len(ch) > 1: @@ -105,14 +106,13 @@ def getURLContent(url, timeout=15): charset = cha[1] else: charset = cha[0] - data = data.decode(charset) except http.client.BadStatusLine: return None finally: conn.close() if res.status == http.client.OK or res.status == http.client.SEE_OTHER: - return data + return data.decode(charset) elif res.status == http.client.FOUND or res.status == http.client.MOVED_PERMANENTLY: return getURLContent(res.getheader("Location"), timeout) else: