Extract atom from networking module to core
This commit is contained in:
parent
471feca8fb
commit
a4f4bb799c
|
@ -15,7 +15,6 @@ nemubotversion = 3.4
|
|||
|
||||
from more import Response
|
||||
|
||||
from .atom import Atom
|
||||
from . import page
|
||||
|
||||
DATAS = None
|
||||
|
@ -154,17 +153,18 @@ def alert_change(content, site):
|
|||
return
|
||||
|
||||
if site["type"] == "atom":
|
||||
from nemubot.tools.feed import Feed
|
||||
if site["_lastpage"] is None:
|
||||
if site["lastcontent"] is None or site["lastcontent"] == "":
|
||||
site["lastcontent"] = content
|
||||
site["_lastpage"] = Atom(site["lastcontent"])
|
||||
site["_lastpage"] = Feed(site["lastcontent"])
|
||||
try:
|
||||
page = Atom(content)
|
||||
page = Feed(content)
|
||||
except:
|
||||
print("An error occurs during Atom parsing. Restart event...")
|
||||
start_watching(site)
|
||||
return
|
||||
diff = site["_lastpage"].diff(page)
|
||||
diff = site["_lastpage"] & page
|
||||
if len(diff) > 0:
|
||||
site["_lastpage"] = page
|
||||
diff.reverse()
|
||||
|
|
|
@ -45,52 +45,88 @@ class AtomEntry:
|
|||
def __repr__(self):
|
||||
return "<AtomEntry title='%s' updated='%s'>" % (self.title, self.updated)
|
||||
|
||||
def __cmp__(self, other):
|
||||
return not (self.id == other.id)
|
||||
|
||||
class Atom:
|
||||
|
||||
class RSSEntry:
|
||||
|
||||
def __init__(self, node):
|
||||
self.id = node.getElementsByTagName("guid")[0].firstChild.nodeValue
|
||||
if node.getElementsByTagName("title")[0].firstChild is not None:
|
||||
self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||
else:
|
||||
self.title = ""
|
||||
|
||||
self.pubDate = node.getElementsByTagName("pubDate")[0].firstChild.nodeValue
|
||||
|
||||
if len(node.getElementsByTagName("description")) > 0 and node.getElementsByTagName("description")[0].firstChild is not None:
|
||||
self.summary = node.getElementsByTagName("description")[0].firstChild.nodeValue
|
||||
else:
|
||||
self.summary = None
|
||||
if len(node.getElementsByTagName("link")) > 0:
|
||||
self.link = node.getElementsByTagName("link")[0].getAttribute("href")
|
||||
else:
|
||||
self.link = None
|
||||
|
||||
def __repr__(self):
|
||||
return "<RSSEntry title='%s' updated='%s'>" % (self.title, self.pubDate)
|
||||
|
||||
def __cmp__(self, other):
|
||||
return not (self.id == other.id)
|
||||
|
||||
|
||||
class Feed:
|
||||
|
||||
def __init__(self, string):
|
||||
self.raw = string
|
||||
self.feed = parseString(string).documentElement
|
||||
self.id = None
|
||||
self.title = None
|
||||
self.updated = None
|
||||
self.entries = list()
|
||||
|
||||
if self.feed.tagName == "rss":
|
||||
self._parse_rss_feed()
|
||||
elif self.feed.tagName == "feed":
|
||||
self._parse_atom_feed()
|
||||
else:
|
||||
from nemubot.exception import IRCException
|
||||
raise IRCException("This is not a valid Atom or RSS feed")
|
||||
|
||||
|
||||
def _parse_atom_feed(self):
|
||||
self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue
|
||||
self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||
|
||||
self.updated = None
|
||||
self.entries = dict()
|
||||
for item in self.feed.getElementsByTagName("entry"):
|
||||
entry = AtomEntry(item)
|
||||
self.entries[entry.id] = entry
|
||||
if self.updated is None or self.updated < entry.updated:
|
||||
self._add_entry(AtomEntry(item))
|
||||
|
||||
|
||||
def _parse_rss_feed(self):
|
||||
self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||
|
||||
for item in self.feed.getElementsByTagName("item"):
|
||||
self._add_entry(RSSEntry(item))
|
||||
|
||||
|
||||
def _add_entry(self, entry):
|
||||
if entry is not None:
|
||||
self.entries.append(entry)
|
||||
if hasattr(entry, "updated") and (self.updated is None or self.updated < entry.updated):
|
||||
self.updated = entry.updated
|
||||
|
||||
def __str__(self):
|
||||
return self.raw
|
||||
|
||||
def diff(self, other):
|
||||
differ = list()
|
||||
for k in other.entries.keys():
|
||||
if self.updated is None and k not in self.entries:
|
||||
self.updated = other.entries[k].updated
|
||||
if k not in self.entries and other.entries[k].updated >= self.updated:
|
||||
differ.append(other.entries[k])
|
||||
return differ
|
||||
def __and__(self, b):
|
||||
ret = []
|
||||
|
||||
def get_ordered_entries(self):
|
||||
entries = self.entries.values()
|
||||
return sorted(entries, key=lambda e: e.updated, reverse=True)
|
||||
for e in self.entries:
|
||||
if e not in b.entries:
|
||||
ret.append(e)
|
||||
|
||||
if __name__ == "__main__":
|
||||
content1 = ""
|
||||
with open("rss.php.1", "r") as f:
|
||||
for line in f:
|
||||
content1 += line
|
||||
content2 = ""
|
||||
with open("rss.php", "r") as f:
|
||||
for line in f:
|
||||
content2 += line
|
||||
a = Atom(content1)
|
||||
print(a.updated)
|
||||
b = Atom(content2)
|
||||
print(b.updated)
|
||||
for e in b.entries:
|
||||
if e not in self.entries:
|
||||
ret.append(e)
|
||||
|
||||
diff = a.diff(b)
|
||||
print(diff)
|
||||
# TODO: Sort by date
|
||||
|
||||
return ret
|
Loading…
Reference in New Issue
Block a user