Tools.Web: fix charset detection on webpages
This commit is contained in:
parent
4be9f78104
commit
fc500bc853
1 changed files with 2 additions and 2 deletions
|
@ -118,7 +118,7 @@ def getURLContent(url, timeout=15):
|
|||
if cntype is not None:
|
||||
lcharset = res.getheader("Content-Type").split(";")
|
||||
if len(lcharset) > 1:
|
||||
for c in charset:
|
||||
for c in lcharset:
|
||||
ch = c.split("=")
|
||||
if ch[0].strip().lower() == "charset" and len(ch) > 1:
|
||||
cha = ch[1].split(".")
|
||||
|
@ -132,7 +132,7 @@ def getURLContent(url, timeout=15):
|
|||
conn.close()
|
||||
|
||||
if res.status == http.client.OK or res.status == http.client.SEE_OTHER:
|
||||
return data.decode(charset)
|
||||
return data.decode(charset).strip()
|
||||
elif ((res.status == http.client.FOUND or
|
||||
res.status == http.client.MOVED_PERMANENTLY) and
|
||||
res.getheader("Location") != url):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue