Fix SNCF scrapper

This commit is contained in:
nemunaire 2022-12-30 20:25:26 +01:00
parent f79022b7a3
commit 9db00b8cd8

View File

@ -1,6 +1,7 @@
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
import base64 import base64
import json import json
import logging
import os import os
import urllib.error import urllib.error
import urllib.parse import urllib.parse
@ -42,7 +43,13 @@ class SNCFAPI:
if statinfo is None or datetime.fromtimestamp(statinfo.st_mtime, tz=timezone.utc) + timedelta(minutes=self.cache_timeout) < datetime.now(tz=timezone.utc): if statinfo is None or datetime.fromtimestamp(statinfo.st_mtime, tz=timezone.utc) + timedelta(minutes=self.cache_timeout) < datetime.now(tz=timezone.utc):
# Do the request and save it # Do the request and save it
req = urllib.request.Request(self.baseurl + "/1.0/infoVoy/rechercherListeCirculations?numero=%d&dateCirculation=%s&codeZoneArret&typeHoraire=TEMPS_REEL" % (int(numero), date.strftime("%Y-%m-%d")), headers={'Authorization': "Basic " + self.auth}) req = urllib.request.Request(self.baseurl + "/1.0/infoVoy/rechercherListeCirculations?numero=%d&dateCirculation=%s&codeZoneArret&typeHoraire=TEMPS_REEL" % (int(numero), date.strftime("%Y-%m-%d")),
headers={
'Authorization': "Basic " + self.auth,
'Accept': "application/json",
'Accept-Language': "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3",
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
})
try: try:
with urllib.request.urlopen(req) as f: with urllib.request.urlopen(req) as f:
with open(cache_file, 'wb') as fd: with open(cache_file, 'wb') as fd:
@ -81,14 +88,25 @@ class SNCFAPI:
if statinfo is None or datetime.fromtimestamp(statinfo.st_mtime, tz=timezone.utc) + timedelta(minutes=self.cache_timeout) < datetime.now(tz=timezone.utc): if statinfo is None or datetime.fromtimestamp(statinfo.st_mtime, tz=timezone.utc) + timedelta(minutes=self.cache_timeout) < datetime.now(tz=timezone.utc):
# Do the request and save it # Do the request and save it
req = urllib.request.Request(self.baseurl + "/edito/bandeaux?region=%s" % (region), headers={'Authorization': "Basic " + self.auth}) req = urllib.request.Request(self.baseurl + "/edito/bandeaux?region=%s" % (region),
headers={
'Authorization': "Basic " + self.auth,
'Accept': "application/json",
'Accept-Language': "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3",
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
})
try: try:
with urllib.request.urlopen(req) as f: with urllib.request.urlopen(req, timeout=3) as f:
with open(cache_file, 'wb') as fd: with open(cache_file, 'wb') as fd:
fd.write(f.read()) fd.write(f.read())
except ConnectionResetError: except ConnectionResetError as e:
logging.exception(e)
pass pass
except urllib.error.URLError: except urllib.error.URLError as e:
logging.exception(e)
pass
except TimeoutError as e:
logging.exception(e)
pass pass
try: try: