[networking] fix watch pages that aren't text/html

This commit is contained in:
nemunaire 2015-07-01 18:15:35 +02:00
parent 487cb13e14
commit 0c960e984a
2 changed files with 26 additions and 11 deletions

View File

@ -74,6 +74,20 @@ def fetch(url, onNone=_onNoneDefault):
raise IRCException(e.strerror) raise IRCException(e.strerror)
def _render(cnt):
"""Render the page contained in cnt as HTML page"""
if cnt is None:
return None
with tempfile.NamedTemporaryFile() as fp:
fp.write(cnt.encode())
args = ["w3m", "-T", "text/html", "-dump"]
args.append(fp.name)
with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc:
return proc.stdout.read().decode()
def render(url, onNone=_onNoneDefault): def render(url, onNone=_onNoneDefault):
"""Use w3m to render the given url """Use w3m to render the given url
@ -81,16 +95,7 @@ def render(url, onNone=_onNoneDefault):
url -- the URL to render url -- the URL to render
""" """
with tempfile.NamedTemporaryFile() as fp: return _render(fetch(url, onNone))
cnt = fetch(url, onNone)
if cnt is None:
return None
fp.write(cnt.encode())
args = ["w3m", "-T", "text/html", "-dump"]
args.append(fp.name)
with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc:
return proc.stdout.read().decode()
def traceURL(url, stack=None): def traceURL(url, stack=None):

View File

@ -176,6 +176,16 @@ def alert_change(content, site):
save() save()
def fwatch(url):
cnt = page.fetch(url, None)
if cnt is not None:
render = page._render(cnt)
if render is None or render == "":
return cnt
return render
return None
def start_watching(site, offset=0): def start_watching(site, offset=0):
"""Launch the event watching given site """Launch the event watching given site
@ -190,7 +200,7 @@ def start_watching(site, offset=0):
#print_debug("Add %s event for site: %s" % (site["type"], o.netloc)) #print_debug("Add %s event for site: %s" % (site["type"], o.netloc))
try: try:
evt = ModuleEvent(func=lambda url: page.render(url, None), evt = ModuleEvent(func=fwatch,
cmp_data=site["lastcontent"], cmp_data=site["lastcontent"],
func_data=site["url"], offset=offset, func_data=site["url"], offset=offset,
interval=site.getInt("time"), interval=site.getInt("time"),