From 6d26a137528ba545d065a67ff0025d65e15644c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=A9munaire?= Date: Tue, 24 Jul 2012 17:56:20 +0200 Subject: [PATCH] Module DDG: add Wikipedia API --- modules/ddg/Wikipedia.py | 59 ++++++++++++++++++++++++++++++++++++++++ modules/ddg/__init__.py | 10 ++++++- 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 modules/ddg/Wikipedia.py diff --git a/modules/ddg/Wikipedia.py b/modules/ddg/Wikipedia.py new file mode 100644 index 0000000..43900f8 --- /dev/null +++ b/modules/ddg/Wikipedia.py @@ -0,0 +1,59 @@ +# coding=utf-8 + +import http.client +import re +from urllib.parse import quote + +import module_states_file as xmlparser + +class Wikipedia: + def __init__(self, terms, lang="fr"): + self.terms = terms + self.lang = lang + self.curRT = -1 + (res, page) = getPage(terms, self.lang) + if res == http.client.OK or res == http.client.SEE_OTHER: + self.wres = xmlparser.parse_string(page) + else: + self.wres = None + + @property + def nextRes(self): + if self.wres is not None and self.wres.hasNode("query"): + if self.wres.getFirstNode("query").hasNode("pages"): + if self.wres.getFirstNode("query").getFirstNode("pages").hasNode("page"): + if self.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").hasNode("revisions"): + self.curRT += 1 + content = self.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").getFirstNode("revisions").getFirstNode("rev").getContent().split("\n") + while self.curRT < len(content) and striplink(content[self.curRT]).strip() == "": + self.curRT += 1 + return striplink(content[self.curRT]) + return "No more results" + + +def striplink(data): + p = re.compile(r'(|\{\{.*\}\}|\[\[[^\]]+\|[^\]]+\|[^\]\|]+\]\])') + q = re.compile(r'\[\[([^\]]+)\|([^\]]+)]]') + r = re.compile(r'\[\[([^\]]+)\]\]') + (s, n) = p.subn('', data) + if s == "": + return s + (s, n) = q.subn(r"\1", s) + if s == "": + return s + (s, n) = r.subn(r"\1", s) + return s.replace("'''", "*") + +def getPage(terms, lang): + conn = http.client.HTTPConnection(lang + ".wikipedia.org") + try: + conn.request("GET", "/w/api.php?format=xml&redirects&action=query&prop=revisions&rvprop=content&rvsection=0&titles=%s" % quote(terms), None, {"User-agent": "Nemubot v3"}) + except socket.gaierror: + print ("impossible de récupérer la page %s."%(p)) + return (http.client.INTERNAL_SERVER_ERROR, None) + + res = conn.getresponse() + data = res.read() + + conn.close() + return (res.status, data) diff --git a/modules/ddg/__init__.py b/modules/ddg/__init__.py index 5708049..2a5385f 100644 --- a/modules/ddg/__init__.py +++ b/modules/ddg/__init__.py @@ -6,6 +6,7 @@ nemubotversion = 3.0 from . import DDGSearch from . import WFASearch +from . import Wikipedia lastSearch = dict() @@ -16,6 +17,7 @@ def load(): def reload(): imp.reload(DDGSearch) imp.reload(WFASearch) + imp.reload(Wikipedia) def parseanswer(msg): global lastSearch @@ -24,6 +26,10 @@ def parseanswer(msg): req = "def" elif msg.cmd[0] == "g" or msg.cmd[0] == "ddg" or msg.cmd[0] == "d": req = "link" + elif msg.cmd[0] == "w" or msg.cmd[0] == "wf" or msg.cmd[0] == "wfr": + req = "fr" + elif msg.cmd[0] == "we" or msg.cmd[0] == "wen": + req = "en" elif msg.cmd[0] == "wfa" or msg.cmd[0] == "calc" or msg.cmd[0] == "wa": req = "wfa" @@ -40,8 +46,10 @@ def parseanswer(msg): if not s.success: msg.send_chn(s.error) return True - else: + elif req == "link" or req == "def": s = DDGSearch.DDGSearch(' '.join(msg.cmd[1:])) + else: + s = Wikipedia.Wikipedia(' '.join(msg.cmd[1:]), req) if req == "def": msg.send_chn(s.definition)