Extract atom from networking module to core
This commit is contained in:
parent
471feca8fb
commit
a4f4bb799c
2 changed files with 75 additions and 39 deletions
|
@ -15,7 +15,6 @@ nemubotversion = 3.4
|
||||||
|
|
||||||
from more import Response
|
from more import Response
|
||||||
|
|
||||||
from .atom import Atom
|
|
||||||
from . import page
|
from . import page
|
||||||
|
|
||||||
DATAS = None
|
DATAS = None
|
||||||
|
@ -154,17 +153,18 @@ def alert_change(content, site):
|
||||||
return
|
return
|
||||||
|
|
||||||
if site["type"] == "atom":
|
if site["type"] == "atom":
|
||||||
|
from nemubot.tools.feed import Feed
|
||||||
if site["_lastpage"] is None:
|
if site["_lastpage"] is None:
|
||||||
if site["lastcontent"] is None or site["lastcontent"] == "":
|
if site["lastcontent"] is None or site["lastcontent"] == "":
|
||||||
site["lastcontent"] = content
|
site["lastcontent"] = content
|
||||||
site["_lastpage"] = Atom(site["lastcontent"])
|
site["_lastpage"] = Feed(site["lastcontent"])
|
||||||
try:
|
try:
|
||||||
page = Atom(content)
|
page = Feed(content)
|
||||||
except:
|
except:
|
||||||
print("An error occurs during Atom parsing. Restart event...")
|
print("An error occurs during Atom parsing. Restart event...")
|
||||||
start_watching(site)
|
start_watching(site)
|
||||||
return
|
return
|
||||||
diff = site["_lastpage"].diff(page)
|
diff = site["_lastpage"] & page
|
||||||
if len(diff) > 0:
|
if len(diff) > 0:
|
||||||
site["_lastpage"] = page
|
site["_lastpage"] = page
|
||||||
diff.reverse()
|
diff.reverse()
|
||||||
|
|
|
@ -45,52 +45,88 @@ class AtomEntry:
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<AtomEntry title='%s' updated='%s'>" % (self.title, self.updated)
|
return "<AtomEntry title='%s' updated='%s'>" % (self.title, self.updated)
|
||||||
|
|
||||||
|
def __cmp__(self, other):
|
||||||
|
return not (self.id == other.id)
|
||||||
|
|
||||||
class Atom:
|
|
||||||
|
class RSSEntry:
|
||||||
|
|
||||||
|
def __init__(self, node):
|
||||||
|
self.id = node.getElementsByTagName("guid")[0].firstChild.nodeValue
|
||||||
|
if node.getElementsByTagName("title")[0].firstChild is not None:
|
||||||
|
self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||||
|
else:
|
||||||
|
self.title = ""
|
||||||
|
|
||||||
|
self.pubDate = node.getElementsByTagName("pubDate")[0].firstChild.nodeValue
|
||||||
|
|
||||||
|
if len(node.getElementsByTagName("description")) > 0 and node.getElementsByTagName("description")[0].firstChild is not None:
|
||||||
|
self.summary = node.getElementsByTagName("description")[0].firstChild.nodeValue
|
||||||
|
else:
|
||||||
|
self.summary = None
|
||||||
|
if len(node.getElementsByTagName("link")) > 0:
|
||||||
|
self.link = node.getElementsByTagName("link")[0].getAttribute("href")
|
||||||
|
else:
|
||||||
|
self.link = None
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<RSSEntry title='%s' updated='%s'>" % (self.title, self.pubDate)
|
||||||
|
|
||||||
|
def __cmp__(self, other):
|
||||||
|
return not (self.id == other.id)
|
||||||
|
|
||||||
|
|
||||||
|
class Feed:
|
||||||
|
|
||||||
def __init__(self, string):
|
def __init__(self, string):
|
||||||
self.raw = string
|
|
||||||
self.feed = parseString(string).documentElement
|
self.feed = parseString(string).documentElement
|
||||||
|
self.id = None
|
||||||
|
self.title = None
|
||||||
|
self.updated = None
|
||||||
|
self.entries = list()
|
||||||
|
|
||||||
|
if self.feed.tagName == "rss":
|
||||||
|
self._parse_rss_feed()
|
||||||
|
elif self.feed.tagName == "feed":
|
||||||
|
self._parse_atom_feed()
|
||||||
|
else:
|
||||||
|
from nemubot.exception import IRCException
|
||||||
|
raise IRCException("This is not a valid Atom or RSS feed")
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_atom_feed(self):
|
||||||
self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue
|
self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue
|
||||||
self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
|
self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||||
|
|
||||||
self.updated = None
|
|
||||||
self.entries = dict()
|
|
||||||
for item in self.feed.getElementsByTagName("entry"):
|
for item in self.feed.getElementsByTagName("entry"):
|
||||||
entry = AtomEntry(item)
|
self._add_entry(AtomEntry(item))
|
||||||
self.entries[entry.id] = entry
|
|
||||||
if self.updated is None or self.updated < entry.updated:
|
|
||||||
|
def _parse_rss_feed(self):
|
||||||
|
self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||||
|
|
||||||
|
for item in self.feed.getElementsByTagName("item"):
|
||||||
|
self._add_entry(RSSEntry(item))
|
||||||
|
|
||||||
|
|
||||||
|
def _add_entry(self, entry):
|
||||||
|
if entry is not None:
|
||||||
|
self.entries.append(entry)
|
||||||
|
if hasattr(entry, "updated") and (self.updated is None or self.updated < entry.updated):
|
||||||
self.updated = entry.updated
|
self.updated = entry.updated
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.raw
|
|
||||||
|
|
||||||
def diff(self, other):
|
def __and__(self, b):
|
||||||
differ = list()
|
ret = []
|
||||||
for k in other.entries.keys():
|
|
||||||
if self.updated is None and k not in self.entries:
|
|
||||||
self.updated = other.entries[k].updated
|
|
||||||
if k not in self.entries and other.entries[k].updated >= self.updated:
|
|
||||||
differ.append(other.entries[k])
|
|
||||||
return differ
|
|
||||||
|
|
||||||
def get_ordered_entries(self):
|
for e in self.entries:
|
||||||
entries = self.entries.values()
|
if e not in b.entries:
|
||||||
return sorted(entries, key=lambda e: e.updated, reverse=True)
|
ret.append(e)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
for e in b.entries:
|
||||||
content1 = ""
|
if e not in self.entries:
|
||||||
with open("rss.php.1", "r") as f:
|
ret.append(e)
|
||||||
for line in f:
|
|
||||||
content1 += line
|
|
||||||
content2 = ""
|
|
||||||
with open("rss.php", "r") as f:
|
|
||||||
for line in f:
|
|
||||||
content2 += line
|
|
||||||
a = Atom(content1)
|
|
||||||
print(a.updated)
|
|
||||||
b = Atom(content2)
|
|
||||||
print(b.updated)
|
|
||||||
|
|
||||||
diff = a.diff(b)
|
# TODO: Sort by date
|
||||||
print(diff)
|
|
||||||
|
return ret
|
Loading…
Add table
Add a link
Reference in a new issue