Extract atom from networking module to core

This commit is contained in:
nemunaire 2015-09-27 15:13:18 +02:00
parent 471feca8fb
commit a4f4bb799c
2 changed files with 75 additions and 39 deletions

View File

@ -15,7 +15,6 @@ nemubotversion = 3.4
from more import Response
from .atom import Atom
from . import page
DATAS = None
@ -154,17 +153,18 @@ def alert_change(content, site):
return
if site["type"] == "atom":
from nemubot.tools.feed import Feed
if site["_lastpage"] is None:
if site["lastcontent"] is None or site["lastcontent"] == "":
site["lastcontent"] = content
site["_lastpage"] = Atom(site["lastcontent"])
site["_lastpage"] = Feed(site["lastcontent"])
try:
page = Atom(content)
page = Feed(content)
except:
print("An error occurs during Atom parsing. Restart event...")
start_watching(site)
return
diff = site["_lastpage"].diff(page)
diff = site["_lastpage"] & page
if len(diff) > 0:
site["_lastpage"] = page
diff.reverse()

View File

@ -45,52 +45,88 @@ class AtomEntry:
def __repr__(self):
return "<AtomEntry title='%s' updated='%s'>" % (self.title, self.updated)
def __cmp__(self, other):
return not (self.id == other.id)
class Atom:
class RSSEntry:
def __init__(self, node):
self.id = node.getElementsByTagName("guid")[0].firstChild.nodeValue
if node.getElementsByTagName("title")[0].firstChild is not None:
self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
else:
self.title = ""
self.pubDate = node.getElementsByTagName("pubDate")[0].firstChild.nodeValue
if len(node.getElementsByTagName("description")) > 0 and node.getElementsByTagName("description")[0].firstChild is not None:
self.summary = node.getElementsByTagName("description")[0].firstChild.nodeValue
else:
self.summary = None
if len(node.getElementsByTagName("link")) > 0:
self.link = node.getElementsByTagName("link")[0].getAttribute("href")
else:
self.link = None
def __repr__(self):
return "<RSSEntry title='%s' updated='%s'>" % (self.title, self.pubDate)
def __cmp__(self, other):
return not (self.id == other.id)
class Feed:
def __init__(self, string):
self.raw = string
self.feed = parseString(string).documentElement
self.id = None
self.title = None
self.updated = None
self.entries = list()
if self.feed.tagName == "rss":
self._parse_rss_feed()
elif self.feed.tagName == "feed":
self._parse_atom_feed()
else:
from nemubot.exception import IRCException
raise IRCException("This is not a valid Atom or RSS feed")
def _parse_atom_feed(self):
self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue
self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
self.updated = None
self.entries = dict()
for item in self.feed.getElementsByTagName("entry"):
entry = AtomEntry(item)
self.entries[entry.id] = entry
if self.updated is None or self.updated < entry.updated:
self._add_entry(AtomEntry(item))
def _parse_rss_feed(self):
self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
for item in self.feed.getElementsByTagName("item"):
self._add_entry(RSSEntry(item))
def _add_entry(self, entry):
if entry is not None:
self.entries.append(entry)
if hasattr(entry, "updated") and (self.updated is None or self.updated < entry.updated):
self.updated = entry.updated
def __str__(self):
return self.raw
def diff(self, other):
differ = list()
for k in other.entries.keys():
if self.updated is None and k not in self.entries:
self.updated = other.entries[k].updated
if k not in self.entries and other.entries[k].updated >= self.updated:
differ.append(other.entries[k])
return differ
def __and__(self, b):
ret = []
def get_ordered_entries(self):
entries = self.entries.values()
return sorted(entries, key=lambda e: e.updated, reverse=True)
for e in self.entries:
if e not in b.entries:
ret.append(e)
if __name__ == "__main__":
content1 = ""
with open("rss.php.1", "r") as f:
for line in f:
content1 += line
content2 = ""
with open("rss.php", "r") as f:
for line in f:
content2 += line
a = Atom(content1)
print(a.updated)
b = Atom(content2)
print(b.updated)
for e in b.entries:
if e not in self.entries:
ret.append(e)
diff = a.diff(b)
print(diff)
# TODO: Sort by date
return ret