Extract atom from networking module to core

2015-09-27 15:13:18 +02:00 · 2015-09-27 15:13:18 +02:00 · a4f4bb799c
commit a4f4bb799c
parent 471feca8fb
2 changed files with 79 additions and 43 deletions
--- a/modules/networking/watchWebsite.py
+++ b/modules/networking/watchWebsite.py
@ -15,7 +15,6 @@ nemubotversion = 3.4

 from more import Response

-from .atom import Atom
 from . import page

 DATAS = None
@ -154,17 +153,18 @@ def alert_change(content, site):
        return

    if site["type"] == "atom":
+        from nemubot.tools.feed import Feed
        if site["_lastpage"] is None:
            if site["lastcontent"] is None or site["lastcontent"] == "":
                site["lastcontent"] = content
-            site["_lastpage"] = Atom(site["lastcontent"])
+            site["_lastpage"] = Feed(site["lastcontent"])
        try:
-            page = Atom(content)
+            page = Feed(content)
        except:
            print("An error occurs during Atom parsing. Restart event...")
            start_watching(site)
            return
-        diff = site["_lastpage"].diff(page)
+        diff = site["_lastpage"] & page
        if len(diff) > 0:
            site["_lastpage"] = page
            diff.reverse()
--- a/modules/networking/atom.py
+++ b/modules/networking/atom.py
@ -45,52 +45,88 @@ class AtomEntry:
    def __repr__(self):
        return "<AtomEntry title='%s' updated='%s'>" % (self.title, self.updated)

+    def __cmp__(self, other):
+        return not (self.id == other.id)

-class Atom:
+
+class RSSEntry:
+
+    def __init__(self, node):
+        self.id = node.getElementsByTagName("guid")[0].firstChild.nodeValue
+        if node.getElementsByTagName("title")[0].firstChild is not None:
+            self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
+        else:
+            self.title = ""
+
+        self.pubDate = node.getElementsByTagName("pubDate")[0].firstChild.nodeValue
+
+        if len(node.getElementsByTagName("description")) > 0 and node.getElementsByTagName("description")[0].firstChild is not None:
+            self.summary = node.getElementsByTagName("description")[0].firstChild.nodeValue
+        else:
+            self.summary = None
+        if len(node.getElementsByTagName("link")) > 0:
+            self.link = node.getElementsByTagName("link")[0].getAttribute("href")
+        else:
+            self.link = None
+
+    def __repr__(self):
+        return "<RSSEntry title='%s' updated='%s'>" % (self.title, self.pubDate)
+
+    def __cmp__(self, other):
+        return not (self.id == other.id)
+
+
+class Feed:

    def __init__(self, string):
-        self.raw = string
        self.feed = parseString(string).documentElement
+        self.id = None
+        self.title = None
+        self.updated = None
+        self.entries = list()
+
+        if self.feed.tagName == "rss":
+            self._parse_rss_feed()
+        elif self.feed.tagName == "feed":
+            self._parse_atom_feed()
+        else:
+            from nemubot.exception import IRCException
+            raise IRCException("This is not a valid Atom or RSS feed")
+
+
+    def _parse_atom_feed(self):
        self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue
        self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue

-        self.updated = None
-        self.entries = dict()
        for item in self.feed.getElementsByTagName("entry"):
-            entry = AtomEntry(item)
-            self.entries[entry.id] = entry
-            if self.updated is None or self.updated < entry.updated:
+            self._add_entry(AtomEntry(item))
+
+
+    def _parse_rss_feed(self):
+        self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
+
+        for item in self.feed.getElementsByTagName("item"):
+            self._add_entry(RSSEntry(item))
+
+
+    def _add_entry(self, entry):
+        if entry is not None:
+            self.entries.append(entry)
+            if hasattr(entry, "updated") and (self.updated is None or self.updated < entry.updated):
                self.updated = entry.updated

-    def __str__(self):
-        return self.raw

-    def diff(self, other):
-        differ = list()
-        for k in other.entries.keys():
-            if self.updated is None and k not in self.entries:
-                self.updated = other.entries[k].updated
-            if k not in self.entries and other.entries[k].updated >= self.updated:
-                differ.append(other.entries[k])
-        return differ
+    def __and__(self, b):
+        ret = []

-    def get_ordered_entries(self):
-        entries = self.entries.values()
-        return sorted(entries, key=lambda e: e.updated, reverse=True)
+        for e in self.entries:
+            if e not in b.entries:
+                ret.append(e)

-if __name__ == "__main__":
-  content1 = ""
-  with open("rss.php.1", "r") as f:
-    for line in f:
-      content1 += line
-  content2 = ""
-  with open("rss.php", "r") as f:
-    for line in f:
-      content2 += line
-  a = Atom(content1)
-  print(a.updated)
-  b = Atom(content2)
-  print(b.updated)
+        for e in b.entries:
+            if e not in self.entries:
+                ret.append(e)

-  diff = a.diff(b)
-  print(diff)
+        # TODO: Sort by date
+
+        return ret