Fix decoding of some pages

2014-07-08 02:44:20 +02:00 · 2014-07-08 02:44:20 +02:00 · fa77a3b323
commit fa77a3b323
parent 63f24c7b59
2 changed files with 5 additions and 5 deletions
--- a/modules/syno.py
+++ b/modules/syno.py
@ -52,7 +52,7 @@ def get_synos(word):
    page = web.getURLContent(url)
    if page is not None:
        synos = list()
-        for line in page.decode().split("\n"):
+        for line in page.split("\n"):
            if re.match("[ \t]*<tr[^>]*>.*</tr>[ \t]*</table>.*", line) is not None:
                for elt in re.finditer(">&[^;]+;([^&]*)&[^;]+;<", line):
                    synos.append(elt.group(1))
--- a/tools/web.py
+++ b/tools/web.py
@ -95,8 +95,9 @@ def getURLContent(url, timeout=15):
        data = res.read(size)

        # Decode content
-        charset = res.getheader("Content-Type").split(";")
-        if len(charset) > 1:
+        charset = "utf-8"
+        lcharset = res.getheader("Content-Type").split(";")
+        if len(lcharset) > 1:
            for c in charset:
                ch = c.split("=")
                if ch[0].strip().lower() == "charset" and len(ch) > 1:
@ -105,14 +106,13 @@ def getURLContent(url, timeout=15):
                        charset = cha[1]
                    else:
                        charset = cha[0]
-                    data = data.decode(charset)
    except http.client.BadStatusLine:
        return None
    finally:
        conn.close()

    if res.status == http.client.OK or res.status == http.client.SEE_OTHER:
-        return data
+        return data.decode(charset)
    elif res.status == http.client.FOUND or res.status == http.client.MOVED_PERMANENTLY:
        return getURLContent(res.getheader("Location"), timeout)
    else: