Webtool: New function to decode htmlentities
This commit is contained in:
parent
509e85f55a
commit
0f01d28528
@ -16,6 +16,7 @@
|
|||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
from html.entities import name2codepoint
|
||||||
import http.client
|
import http.client
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
@ -136,7 +137,12 @@ def getJSON(url, timeout=15):
|
|||||||
|
|
||||||
# Other utils
|
# Other utils
|
||||||
|
|
||||||
|
def htmlentitydecode(s):
|
||||||
|
"""Decode htmlentities"""
|
||||||
|
return re.sub('&(%s);' % '|'.join(name2codepoint),
|
||||||
|
lambda m: chr(name2codepoint[m.group(1)]), s)
|
||||||
|
|
||||||
def striphtml(data):
|
def striphtml(data):
|
||||||
"""Remove HTML tags from text"""
|
"""Remove HTML tags from text"""
|
||||||
p = re.compile(r'<.*?>')
|
p = re.compile(r'<.*?>')
|
||||||
return p.sub('', data).replace("(", "/(").replace(")", ")/").replace(""", "\"")
|
return htmlentitydecode(p.sub('', data).replace("(", "/(").replace(")", ")/").replace(""", "\""))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user