From 6d26a137528ba545d065a67ff0025d65e15644c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?N=C3=A9munaire?= <nemunaire@pomail.fr>
Date: Tue, 24 Jul 2012 17:56:20 +0200
Subject: [PATCH] Module DDG: add Wikipedia API

---
 modules/ddg/Wikipedia.py | 59 ++++++++++++++++++++++++++++++++++++++++
 modules/ddg/__init__.py  | 10 ++++++-
 2 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 modules/ddg/Wikipedia.py

diff --git a/modules/ddg/Wikipedia.py b/modules/ddg/Wikipedia.py
new file mode 100644
index 0000000..43900f8
--- /dev/null
+++ b/modules/ddg/Wikipedia.py
@@ -0,0 +1,59 @@
+# coding=utf-8
+
+import http.client
+import re
+from urllib.parse import quote
+
+import module_states_file as xmlparser
+
+class Wikipedia:
+  def __init__(self, terms, lang="fr"):
+    self.terms = terms
+    self.lang = lang
+    self.curRT = -1
+    (res, page) = getPage(terms, self.lang)
+    if res == http.client.OK or res == http.client.SEE_OTHER:
+      self.wres = xmlparser.parse_string(page)
+    else:
+      self.wres = None
+
+  @property
+  def nextRes(self):
+    if self.wres is not None and self.wres.hasNode("query"):
+      if self.wres.getFirstNode("query").hasNode("pages"):
+        if self.wres.getFirstNode("query").getFirstNode("pages").hasNode("page"):
+          if self.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").hasNode("revisions"):
+            self.curRT += 1
+            content = self.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").getFirstNode("revisions").getFirstNode("rev").getContent().split("\n")
+            while self.curRT < len(content) and striplink(content[self.curRT]).strip() == "":
+              self.curRT += 1
+            return striplink(content[self.curRT])
+    return "No more results"
+
+
+def striplink(data):
+  p = re.compile(r'(<!--.*-->|\{\{.*\}\}|\[\[[^\]]+\|[^\]]+\|[^\]\|]+\]\])')
+  q = re.compile(r'\[\[([^\]]+)\|([^\]]+)]]')
+  r = re.compile(r'\[\[([^\]]+)\]\]')
+  (s, n) = p.subn('', data)
+  if s == "":
+    return s
+  (s, n) = q.subn(r"\1", s)
+  if s == "":
+    return s
+  (s, n) = r.subn(r"\1", s)
+  return s.replace("'''", "*")
+
+def getPage(terms, lang):
+  conn = http.client.HTTPConnection(lang + ".wikipedia.org")
+  try:
+    conn.request("GET", "/w/api.php?format=xml&redirects&action=query&prop=revisions&rvprop=content&rvsection=0&titles=%s" % quote(terms), None, {"User-agent": "Nemubot v3"})
+  except socket.gaierror:
+    print ("impossible de récupérer la page %s."%(p))
+    return (http.client.INTERNAL_SERVER_ERROR, None)
+
+  res = conn.getresponse()
+  data = res.read()
+
+  conn.close()
+  return (res.status, data)
diff --git a/modules/ddg/__init__.py b/modules/ddg/__init__.py
index 5708049..2a5385f 100644
--- a/modules/ddg/__init__.py
+++ b/modules/ddg/__init__.py
@@ -6,6 +6,7 @@ nemubotversion = 3.0
 
 from . import DDGSearch
 from . import WFASearch
+from . import Wikipedia
 
 lastSearch = dict()
 
@@ -16,6 +17,7 @@ def load():
 def reload():
   imp.reload(DDGSearch)
   imp.reload(WFASearch)
+  imp.reload(Wikipedia)
 
 def parseanswer(msg):
   global lastSearch
@@ -24,6 +26,10 @@ def parseanswer(msg):
     req = "def"
   elif msg.cmd[0] == "g" or msg.cmd[0] == "ddg" or msg.cmd[0] == "d":
     req = "link"
+  elif msg.cmd[0] == "w" or msg.cmd[0] == "wf" or msg.cmd[0] == "wfr":
+    req = "fr"
+  elif msg.cmd[0] == "we" or msg.cmd[0] == "wen":
+    req = "en"
   elif msg.cmd[0] == "wfa" or msg.cmd[0] == "calc" or msg.cmd[0] == "wa":
     req = "wfa"
 
@@ -40,8 +46,10 @@ def parseanswer(msg):
         if not s.success:
           msg.send_chn(s.error)
           return True
-      else:
+      elif req == "link" or req == "def":
         s = DDGSearch.DDGSearch(' '.join(msg.cmd[1:]))
+      else:
+        s = Wikipedia.Wikipedia(' '.join(msg.cmd[1:]), req)
 
       if req == "def":
         msg.send_chn(s.definition)