Use url.request in DDG module; fix issue #1: output for Wiki is better
This commit is contained in:
parent
6f6ddd4d1e
commit
c3c697bdab
@ -1,13 +1,17 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
|
import xmlparser
|
||||||
from tools import web
|
from tools import web
|
||||||
|
|
||||||
class DDGSearch:
|
class DDGSearch:
|
||||||
def __init__(self, terms):
|
def __init__(self, terms):
|
||||||
self.terms = terms
|
self.terms = terms
|
||||||
self.ddgres = web.getXML("http://api.duckduckgo.com/?q=%s&format=xml" % quote(terms))
|
|
||||||
|
raw = urlopen("https://api.duckduckgo.com/?q=%s&format=xml" % quote(terms), timeout=10)
|
||||||
|
self.ddgres = xmlparser.parse_string(raw.read())
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def type(self):
|
def type(self):
|
||||||
|
@ -1,17 +1,19 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
from tools import web
|
import xmlparser
|
||||||
|
|
||||||
class WFASearch:
|
class WFASearch:
|
||||||
def __init__(self, terms):
|
def __init__(self, terms):
|
||||||
self.terms = terms
|
self.terms = terms
|
||||||
try:
|
try:
|
||||||
self.wfares = web.getXML("http://api.wolframalpha.com/v2/query?"
|
raw = urlopen("http://api.wolframalpha.com/v2/query?"
|
||||||
"input=%s&appid=%s"
|
"input=%s&appid=%s"
|
||||||
% (quote(terms),
|
% (quote(terms),
|
||||||
CONF.getNode("wfaapi")["key"]))
|
CONF.getNode("wfaapi")["key"]), timeout=15)
|
||||||
|
self.wfares = xmlparser.parse_string(raw.read())
|
||||||
except (TypeError, KeyError):
|
except (TypeError, KeyError):
|
||||||
print ("You need a Wolfram|Alpha API key in order to use this "
|
print ("You need a Wolfram|Alpha API key in order to use this "
|
||||||
"module. Add it to the module configuration file:\n<wfaapi"
|
"module. Add it to the module configuration file:\n<wfaapi"
|
||||||
|
@ -1,76 +1,55 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import http.client
|
|
||||||
import re
|
import re
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
import xmlparser
|
import xmlparser
|
||||||
|
|
||||||
class Wikipedia:
|
class Wikipedia:
|
||||||
def __init__(self, terms, lang="fr"):
|
def __init__(self, terms, lang="fr", site="wikipedia.org", section=0):
|
||||||
self.terms = terms
|
self.terms = terms
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
self.curRT = 0
|
self.curRT = 0
|
||||||
(res, page) = getPage(terms, self.lang)
|
|
||||||
if res == http.client.OK or res == http.client.SEE_OTHER:
|
raw = urllib.request.urlopen(urllib.request.Request("http://" + self.lang + "." + site + "/w/api.php?format=xml&redirects&action=query&prop=revisions&rvprop=content&titles=%s" % (quote(terms)), headers={"User-agent": "Nemubot v3"}))
|
||||||
self.wres = xmlparser.parse_string(page)
|
self.wres = xmlparser.parse_string(raw.read())
|
||||||
if self.wres is None or not (self.wres.hasNode("query") and self.wres.getFirstNode("query").hasNode("pages") and self.wres.getFirstNode("query").getFirstNode("pages").hasNode("page") and self.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").hasNode("revisions")):
|
if self.wres is None or not (self.wres.hasNode("query") and self.wres.getFirstNode("query").hasNode("pages") and self.wres.getFirstNode("query").getFirstNode("pages").hasNode("page") and self.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").hasNode("revisions")):
|
||||||
self.wres = None
|
self.wres = None
|
||||||
else:
|
else:
|
||||||
self.infobox = parseInfobox(self)
|
self.wres = self.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").getFirstNode("revisions").getFirstNode("rev").getContent()
|
||||||
else:
|
self.wres = striplink(self.wres)
|
||||||
self.wres = None
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def nextRes(self):
|
def nextRes(self):
|
||||||
if self.wres is not None:
|
if self.wres is not None:
|
||||||
for cnt in self.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").getFirstNode("revisions").getFirstNode("rev").getContent().split("\n"):
|
for cnt in self.wres.split("\n"):
|
||||||
if self.curRT > 0:
|
if self.curRT > 0:
|
||||||
self.curRT -= 1
|
self.curRT -= 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
c = striplink(cnt).strip()
|
(c, u) = RGXP_s.subn(' ', cnt)
|
||||||
|
c = c.strip()
|
||||||
if c != "":
|
if c != "":
|
||||||
yield c
|
yield c
|
||||||
|
|
||||||
|
RGXP_p = re.compile(r'(<!--.*-->|<ref[^>]*/>|<ref[^>]*>[^>]*</ref>|<dfn[^>]*>[^>]*</dfn>|\{\{[^}]*\}\}|\[\[([^\[\]]*\[\[[^\]\[]*\]\])+[^\[\]]*\]\]|\{\{([^{}]*\{\{.*\}\}[^{}]*)+\}\}|\[\[[^\]|]+(\|[^\]\|]+)*\]\])', re.I)
|
||||||
|
RGXP_l = re.compile(r'\{\{(nobr|lang\|[^|}]+)\|([^}]+)\}\}', re.I)
|
||||||
|
RGXP_t = re.compile("==+ *([^=]+) *=+=\n+([^\n])", re.I)
|
||||||
|
RGXP_q = re.compile(r'\[\[([^\[\]|]+)\|([^\]|]+)]]', re.I)
|
||||||
|
RGXP_r = re.compile(r'\[\[([^\[\]|]+)\]\]', re.I)
|
||||||
|
RGXP_s = re.compile(r'\s+')
|
||||||
|
|
||||||
def parseInfobox(w):
|
def striplink(s):
|
||||||
inInfobox = False
|
(s, n) = RGXP_l.subn(r"\2", s)
|
||||||
view=-1
|
if s == "": return s
|
||||||
for cnt in w.wres.getFirstNode("query").getFirstNode("pages").getFirstNode("page").getFirstNode("revisions").getFirstNode("rev").getContent().split("\n"):
|
|
||||||
view += 1
|
|
||||||
if inInfobox:
|
|
||||||
if cnt.find("}}") == 0:
|
|
||||||
inInfobox=False
|
|
||||||
elif cnt.find("{{") == 0:
|
|
||||||
inInfobox=True
|
|
||||||
else:
|
|
||||||
w.curRT += view
|
|
||||||
break
|
|
||||||
|
|
||||||
def striplink(data):
|
(s, n) = RGXP_q.subn(r"\1", s)
|
||||||
p = re.compile(r'(<!--.*-->|\{\{.*\}\}|\[\[[^\]]+\|[^\]]+\|[^\]\|]+\]\])')
|
if s == "": return s
|
||||||
q = re.compile(r'\[\[([^\]]+)\|([^\]]+)]]')
|
|
||||||
r = re.compile(r'\[\[([^\]]+)\]\]')
|
|
||||||
(s, n) = p.subn('', data)
|
|
||||||
if s == "":
|
|
||||||
return s
|
|
||||||
(s, n) = q.subn(r"\1", s)
|
|
||||||
if s == "":
|
|
||||||
return s
|
|
||||||
(s, n) = r.subn(r"\1", s)
|
|
||||||
return s.replace("'''", "*")
|
|
||||||
|
|
||||||
def getPage(terms, lang, site="wikipedia"):
|
(s, n) = RGXP_r.subn(r"\1", s)
|
||||||
conn = http.client.HTTPConnection(lang + "." + site + ".org", timeout=5)
|
if s == "": return s
|
||||||
try:
|
|
||||||
conn.request("GET", "/w/api.php?format=xml&redirects&action=query&prop=revisions&rvprop=content&rvsection=0&titles=%s" % quote(terms), None, {"User-agent": "Nemubot v3"})
|
|
||||||
except socket.gaierror:
|
|
||||||
print ("impossible de récupérer la page %s."%(p))
|
|
||||||
return (http.client.INTERNAL_SERVER_ERROR, None)
|
|
||||||
|
|
||||||
res = conn.getresponse()
|
(s, n) = RGXP_p.subn('', s)
|
||||||
data = res.read()
|
(s, n) = RGXP_t.subn("\x03\x16" + r"\1" + " :\x03\x16 " + r"\2", s)
|
||||||
|
return s.replace("'''", "\x03\x02").replace("''", "\x03\x1f")
|
||||||
conn.close()
|
|
||||||
return (res.status, data)
|
|
||||||
|
@ -25,6 +25,7 @@ def load(context):
|
|||||||
add_hook("cmd_hook", Hook(calculate, "wa"))
|
add_hook("cmd_hook", Hook(calculate, "wa"))
|
||||||
add_hook("cmd_hook", Hook(calculate, "wfa"))
|
add_hook("cmd_hook", Hook(calculate, "wfa"))
|
||||||
add_hook("cmd_hook", Hook(calculate, "calc"))
|
add_hook("cmd_hook", Hook(calculate, "calc"))
|
||||||
|
add_hook("cmd_hook", Hook(wiki, "dico"))
|
||||||
add_hook("cmd_hook", Hook(wiki, "w"))
|
add_hook("cmd_hook", Hook(wiki, "w"))
|
||||||
add_hook("cmd_hook", Hook(wiki, "wf"))
|
add_hook("cmd_hook", Hook(wiki, "wf"))
|
||||||
add_hook("cmd_hook", Hook(wiki, "wfr"))
|
add_hook("cmd_hook", Hook(wiki, "wfr"))
|
||||||
@ -98,12 +99,20 @@ def wiki(msg):
|
|||||||
return Response(msg.sender,
|
return Response(msg.sender,
|
||||||
"Indicate a term to search",
|
"Indicate a term to search",
|
||||||
msg.channel, nick=msg.nick)
|
msg.channel, nick=msg.nick)
|
||||||
if msg.cmds[0] == "w" or msg.cmds[0] == "wf" or msg.cmds[0] == "wfr":
|
if msg.cmds[0] == "dico":
|
||||||
lang = "fr"
|
lang = "fr"
|
||||||
|
site = "wiktionary.org"
|
||||||
|
section = 1
|
||||||
|
elif msg.cmds[0] == "w" or msg.cmds[0] == "wf" or msg.cmds[0] == "wfr":
|
||||||
|
lang = "fr"
|
||||||
|
site = "wikipedia.org"
|
||||||
|
section = 0
|
||||||
else:
|
else:
|
||||||
lang = "en"
|
lang = "en"
|
||||||
|
site = "wikipedia.org"
|
||||||
|
section = 0
|
||||||
|
|
||||||
s = Wikipedia.Wikipedia(' '.join(msg.cmds[1:]), lang)
|
s = Wikipedia.Wikipedia(' '.join(msg.cmds[1:]), lang, site, section)
|
||||||
|
|
||||||
res = Response(msg.sender, channel=msg.channel, nomore="No more results")
|
res = Response(msg.sender, channel=msg.channel, nomore="No more results")
|
||||||
for result in s.nextRes:
|
for result in s.nextRes:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user