striphtml: also convert ´ and collapse multiple space, as HTML display do

This commit is contained in:
nemunaire 2015-09-13 11:18:07 +02:00
parent 9fa8902f1a
commit 8988dd0d41

View File

@ -199,7 +199,10 @@ def striphtml(data):
import re import re
p = re.compile(r'<.*?>') p = re.compile(r'<.*?>')
return htmlentitydecode(p.sub('', data) r, _ = re.subn(r' +', ' ', htmlentitydecode(p.sub('', data)
.replace("&#x28;", "/(") .replace("&#x28;", "/(")
.replace("&#x29;", ")/") .replace("&#x29;", ")/")
.replace("&#x22;", "\"")) .replace("&#39;", "´")
.replace("&#x22;", "\""))
.replace('\n', ' '))
return r