New module mediawiki

2014-09-02 21:19:08 +02:00 · 2014-09-02 21:19:08 +02:00 · 7387fabee1
commit 7387fabee1
parent 3bc53bb4ef
3 changed files with 92 additions and 107 deletions
--- a/modules/ddg/Wikipedia.py
+++ b/modules/ddg/Wikipedia.py
@ -1,56 +0,0 @@
-# coding=utf-8
-
-import re
-from urllib.parse import quote
-import urllib.request
-
-import xmlparser
-
-class Wikipedia:
-    def __init__(self, terms, lang="fr", site="wikipedia.org", section=0):
-        self.terms = terms
-        self.lang = lang
-        self.curRT = section
-
-        raw = urllib.request.urlopen(urllib.request.Request("http://" + self.lang + "." + site + "/w/api.php?format=xml&redirects&action=query&prop=revisions&rvprop=content&titles=%s" % (quote(terms)), headers={"User-agent": "Nemubot v3"}))
-        self.wres = xmlparser.parse_string(raw.read())
-        if self.wres is None or not (self.wres.hasNode("query") and self.wres.getFirstNode("query").hasNode("pages") and self.wres.getFirstNode("query").getFirstNode("pages").hasNode("page") and self.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").hasNode("revisions")):
-            self.wres = None
-        else:
-            self.wres = self.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").getFirstNode("revisions").getFirstNode("rev").getContent()
-            self.wres = striplink(self.wres)
-
-    @property
-    def nextRes(self):
-        if self.wres is not None:
-            for cnt in self.wres.split("\n"):
-                if self.curRT > 0:
-                    self.curRT -= 1
-                    continue
-
-                (c, u) = RGXP_s.subn(' ', cnt)
-                c = c.strip()
-                if c != "":
-                    yield c
-
-RGXP_p = re.compile(r"(<!--.*-->|<ref[^>]*/>|<ref[^>]*>[^>]*</ref>|<dfn[^>]*>[^>]*</dfn>|\{\{[^{}]*\}\}|\[\[([^\[\]]*\[\[[^\]\[]*\]\])+[^\[\]]*\]\]|\{\{([^{}]*\{\{[^{}]*\}\}[^{}]*)+\}\}|\{\{([^{}]*\{\{([^{}]*\{\{[^{}]*\}\}[^{}]*)+\}\}[^{}]*)+\}\}|\[\[[^\]|]+(\|[^\]\|]+)*\]\])|#\* ''" + "\n", re.I)
-RGXP_l = re.compile(r'\{\{(nobr|lang\|[^|}]+)\|([^}]+)\}\}', re.I)
-RGXP_m = re.compile(r'\{\{pron\|([^|}]+)\|[^}]+\}\}', re.I)
-RGXP_t = re.compile("==+ *([^=]+) *=+=\n+([^\n])", re.I)
-RGXP_q = re.compile(r'\[\[([^\[\]|]+)\|([^\]|]+)]]', re.I)
-RGXP_r = re.compile(r'\[\[([^\[\]|]+)\]\]', re.I)
-RGXP_s = re.compile(r'\s+')
-
-def striplink(s):
-    s.replace("{{m}}", "masculin").replace("{{f}}", "feminin").replace("{{n}}", "neutre")
-    (s, n) = RGXP_m.subn(r"[\1]", s)
-    (s, n) = RGXP_l.subn(r"\2", s)
-
-    (s, n) = RGXP_q.subn(r"\1", s)
-    (s, n) = RGXP_r.subn(r"\1", s)
-
-    (s, n) = RGXP_p.subn('', s)
-    if s == "": return s
-
-    (s, n) = RGXP_t.subn("\x03\x16" + r"\1" + " :\x03\x16 " + r"\2", s)
-    return s.replace("'''", "\x03\x02").replace("''", "\x03\x1f")
--- a/modules/ddg/init.py
+++ b/modules/ddg/init.py
@ -11,7 +11,6 @@ nemubotversion = 3.4
 from . import DDGSearch
 from . import UrbanDictionnary
 from . import WFASearch
-from . import Wikipedia

 def load(context):
    global CONF
@ -98,53 +97,3 @@ def calculate(msg):
        return res
    else:
        return Response(msg.sender, s.error, msg.channel)
-
-
-@hook("cmd_hook", "wikipedia")
-def wikipedia(msg):
-    return wiki("wikipedia.org", 0, msg)
-
-@hook("cmd_hook", "wiktionary")
-def wiktionary(msg):
-    return wiki("wiktionary.org", 1, msg)
-
-@hook("cmd_hook", "etymology")
-def wiktionary(msg):
-    return wiki("wiktionary.org", 0, msg)
-
-def wiki(site, section, msg):
-    if len(msg.cmds) <= 1:
-        return Response(msg.sender,
-                        "Indicate a term to search",
-                        msg.channel, nick=msg.nick)
-    if len(msg.cmds) > 2 and len(msg.cmds[1]) < 4:
-        lang = msg.cmds[1]
-        extract = 2
-    else:
-        lang = "fr"
-        extract = 1
-
-    s = Wikipedia.Wikipedia(' '.join(msg.cmds[extract:]), lang, site, section)
-
-    res = Response(msg.sender, channel=msg.channel, nomore="No more results")
-    if site == "wiktionary.org":
-        tout = [result for result in s.nextRes if result.find("\x03\x16 :\x03\x16 ") != 0]
-        if len(tout) > 0:
-            defI=1
-            for t in tout:
-                if t.find("# ") == 0:
-                    t = t.replace("# ", "%d. " % defI)
-                    defI += 1
-                elif t.find("#* ") == 0:
-                    t = t.replace("#* ", "  * ")
-                res.append_message(t)
-    else:
-        for result in s.nextRes:
-            res.append_message(result)
-
-    if len(res.messages) > 0:
-        return res
-    else:
-        return Response(msg.sender,
-                        "No information about " + " ".join(msg.cmds[extract:]),
-                        msg.channel)