Fix SNCF scrapper

This commit is contained in:
nemunaire 2022-12-30 20:25:26 +01:00
parent f79022b7a3
commit 9db00b8cd8

View File

@ -1,6 +1,7 @@
from datetime import datetime, timedelta, timezone
import base64
import json
import logging
import os
import urllib.error
import urllib.parse
@ -42,7 +43,13 @@ class SNCFAPI:
if statinfo is None or datetime.fromtimestamp(statinfo.st_mtime, tz=timezone.utc) + timedelta(minutes=self.cache_timeout) < datetime.now(tz=timezone.utc):
# Do the request and save it
req = urllib.request.Request(self.baseurl + "/1.0/infoVoy/rechercherListeCirculations?numero=%d&dateCirculation=%s&codeZoneArret&typeHoraire=TEMPS_REEL" % (int(numero), date.strftime("%Y-%m-%d")), headers={'Authorization': "Basic " + self.auth})
req = urllib.request.Request(self.baseurl + "/1.0/infoVoy/rechercherListeCirculations?numero=%d&dateCirculation=%s&codeZoneArret&typeHoraire=TEMPS_REEL" % (int(numero), date.strftime("%Y-%m-%d")),
headers={
'Authorization': "Basic " + self.auth,
'Accept': "application/json",
'Accept-Language': "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3",
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
})
try:
with urllib.request.urlopen(req) as f:
with open(cache_file, 'wb') as fd:
@ -81,14 +88,25 @@ class SNCFAPI:
if statinfo is None or datetime.fromtimestamp(statinfo.st_mtime, tz=timezone.utc) + timedelta(minutes=self.cache_timeout) < datetime.now(tz=timezone.utc):
# Do the request and save it
req = urllib.request.Request(self.baseurl + "/edito/bandeaux?region=%s" % (region), headers={'Authorization': "Basic " + self.auth})
req = urllib.request.Request(self.baseurl + "/edito/bandeaux?region=%s" % (region),
headers={
'Authorization': "Basic " + self.auth,
'Accept': "application/json",
'Accept-Language': "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3",
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
})
try:
with urllib.request.urlopen(req) as f:
with urllib.request.urlopen(req, timeout=3) as f:
with open(cache_file, 'wb') as fd:
fd.write(f.read())
except ConnectionResetError:
except ConnectionResetError as e:
logging.exception(e)
pass
except urllib.error.URLError:
except urllib.error.URLError as e:
logging.exception(e)
pass
except TimeoutError as e:
logging.exception(e)
pass
try: