tools/feed: hardened parser
This commit is contained in:
parent
04dcf07fb2
commit
7102e08000
@ -11,11 +11,16 @@ from xml.dom.minidom import getDOMImplementation
|
|||||||
class AtomEntry:
|
class AtomEntry:
|
||||||
|
|
||||||
def __init__(self, node):
|
def __init__(self, node):
|
||||||
|
if len(node.getElementsByTagName("id")) > 0 and node.getElementsByTagName("id")[0].firstChild is not None:
|
||||||
self.id = node.getElementsByTagName("id")[0].firstChild.nodeValue
|
self.id = node.getElementsByTagName("id")[0].firstChild.nodeValue
|
||||||
if node.getElementsByTagName("title")[0].firstChild is not None:
|
else:
|
||||||
|
self.id = None
|
||||||
|
|
||||||
|
if len(node.getElementsByTagName("title")) > 0 and node.getElementsByTagName("title")[0].firstChild is not None:
|
||||||
self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
|
self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||||
else:
|
else:
|
||||||
self.title = ""
|
self.title = ""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:19], "%Y-%m-%dT%H:%M:%S")
|
self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:19], "%Y-%m-%dT%H:%M:%S")
|
||||||
except:
|
except:
|
||||||
@ -25,26 +30,32 @@ class AtomEntry:
|
|||||||
print(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10])
|
print(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10])
|
||||||
self.updated = time.localtime()
|
self.updated = time.localtime()
|
||||||
self.updated = datetime.datetime(*self.updated[:6])
|
self.updated = datetime.datetime(*self.updated[:6])
|
||||||
|
|
||||||
if len(node.getElementsByTagName("summary")) > 0 and node.getElementsByTagName("summary")[0].firstChild is not None:
|
if len(node.getElementsByTagName("summary")) > 0 and node.getElementsByTagName("summary")[0].firstChild is not None:
|
||||||
self.summary = node.getElementsByTagName("summary")[0].firstChild.nodeValue
|
self.summary = node.getElementsByTagName("summary")[0].firstChild.nodeValue
|
||||||
else:
|
else:
|
||||||
self.summary = None
|
self.summary = None
|
||||||
if len(node.getElementsByTagName("link")) > 0:
|
|
||||||
|
if len(node.getElementsByTagName("link")) > 0 and node.getElementsByTagName("link")[0].hasAttribute("href"):
|
||||||
self.link = node.getElementsByTagName("link")[0].getAttribute("href")
|
self.link = node.getElementsByTagName("link")[0].getAttribute("href")
|
||||||
else:
|
else:
|
||||||
self.link = None
|
self.link = None
|
||||||
if len(node.getElementsByTagName("category")) >= 1:
|
|
||||||
|
if len(node.getElementsByTagName("category")) >= 1 and node.getElementsByTagName("category")[0].hasAttribute("term"):
|
||||||
self.category = node.getElementsByTagName("category")[0].getAttribute("term")
|
self.category = node.getElementsByTagName("category")[0].getAttribute("term")
|
||||||
else:
|
else:
|
||||||
self.category = None
|
self.category = None
|
||||||
if len(node.getElementsByTagName("link")) > 1:
|
|
||||||
|
if len(node.getElementsByTagName("link")) > 1 and node.getElementsByTagName("link")[1].hasAttribute("href"):
|
||||||
self.link2 = node.getElementsByTagName("link")[1].getAttribute("href")
|
self.link2 = node.getElementsByTagName("link")[1].getAttribute("href")
|
||||||
else:
|
else:
|
||||||
self.link2 = None
|
self.link2 = None
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<AtomEntry title='%s' updated='%s'>" % (self.title, self.updated)
|
return "<AtomEntry title='%s' updated='%s'>" % (self.title, self.updated)
|
||||||
|
|
||||||
|
|
||||||
def __cmp__(self, other):
|
def __cmp__(self, other):
|
||||||
return not (self.id == other.id)
|
return not (self.id == other.id)
|
||||||
|
|
||||||
@ -52,26 +63,36 @@ class AtomEntry:
|
|||||||
class RSSEntry:
|
class RSSEntry:
|
||||||
|
|
||||||
def __init__(self, node):
|
def __init__(self, node):
|
||||||
|
if len(node.getElementsByTagName("guid")) > 0 and node.getElementsByTagName("guid")[0].firstChild is not None:
|
||||||
self.id = node.getElementsByTagName("guid")[0].firstChild.nodeValue
|
self.id = node.getElementsByTagName("guid")[0].firstChild.nodeValue
|
||||||
if node.getElementsByTagName("title")[0].firstChild is not None:
|
else:
|
||||||
|
self.id = None
|
||||||
|
|
||||||
|
if len(node.getElementsByTagName("title")) > 0 and node.getElementsByTagName("title")[0].firstChild is not None:
|
||||||
self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
|
self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||||
else:
|
else:
|
||||||
self.title = ""
|
self.title = ""
|
||||||
|
|
||||||
|
if len(node.getElementsByTagName("pubDate")) > 0 and node.getElementsByTagName("pubDate")[0].firstChild is not None:
|
||||||
self.pubDate = node.getElementsByTagName("pubDate")[0].firstChild.nodeValue
|
self.pubDate = node.getElementsByTagName("pubDate")[0].firstChild.nodeValue
|
||||||
|
else:
|
||||||
|
self.pubDate = ""
|
||||||
|
|
||||||
if len(node.getElementsByTagName("description")) > 0 and node.getElementsByTagName("description")[0].firstChild is not None:
|
if len(node.getElementsByTagName("description")) > 0 and node.getElementsByTagName("description")[0].firstChild is not None:
|
||||||
self.summary = node.getElementsByTagName("description")[0].firstChild.nodeValue
|
self.summary = node.getElementsByTagName("description")[0].firstChild.nodeValue
|
||||||
else:
|
else:
|
||||||
self.summary = None
|
self.summary = None
|
||||||
if len(node.getElementsByTagName("link")) > 0:
|
|
||||||
|
if len(node.getElementsByTagName("link")) > 0 and node.getElementsByTagName("link")[0].hasAttribute("href"):
|
||||||
self.link = node.getElementsByTagName("link")[0].getAttribute("href")
|
self.link = node.getElementsByTagName("link")[0].getAttribute("href")
|
||||||
else:
|
else:
|
||||||
self.link = None
|
self.link = None
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<RSSEntry title='%s' updated='%s'>" % (self.title, self.pubDate)
|
return "<RSSEntry title='%s' updated='%s'>" % (self.title, self.pubDate)
|
||||||
|
|
||||||
|
|
||||||
def __cmp__(self, other):
|
def __cmp__(self, other):
|
||||||
return not (self.id == other.id)
|
return not (self.id == other.id)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user