Parse Wiktionary
This commit is contained in:
parent
c3c697bdab
commit
b3a9ccf75a
@ -33,23 +33,24 @@ class Wikipedia:
|
|||||||
if c != "":
|
if c != "":
|
||||||
yield c
|
yield c
|
||||||
|
|
||||||
RGXP_p = re.compile(r'(<!--.*-->|<ref[^>]*/>|<ref[^>]*>[^>]*</ref>|<dfn[^>]*>[^>]*</dfn>|\{\{[^}]*\}\}|\[\[([^\[\]]*\[\[[^\]\[]*\]\])+[^\[\]]*\]\]|\{\{([^{}]*\{\{.*\}\}[^{}]*)+\}\}|\[\[[^\]|]+(\|[^\]\|]+)*\]\])', re.I)
|
RGXP_p = re.compile(r"(<!--.*-->|<ref[^>]*/>|<ref[^>]*>[^>]*</ref>|<dfn[^>]*>[^>]*</dfn>|\{\{[^}]*\}\}|\[\[([^\[\]]*\[\[[^\]\[]*\]\])+[^\[\]]*\]\]|\{\{([^{}]*\{\{.*\}\}[^{}]*)+\}\}|\[\[[^\]|]+(\|[^\]\|]+)*\]\])|#\* ''" + "\n", re.I)
|
||||||
RGXP_l = re.compile(r'\{\{(nobr|lang\|[^|}]+)\|([^}]+)\}\}', re.I)
|
RGXP_l = re.compile(r'\{\{(nobr|lang\|[^|}]+)\|([^}]+)\}\}', re.I)
|
||||||
|
RGXP_m = re.compile(r'\{\{pron\|([^|}]+)\|[^}]+\}\}', re.I)
|
||||||
RGXP_t = re.compile("==+ *([^=]+) *=+=\n+([^\n])", re.I)
|
RGXP_t = re.compile("==+ *([^=]+) *=+=\n+([^\n])", re.I)
|
||||||
RGXP_q = re.compile(r'\[\[([^\[\]|]+)\|([^\]|]+)]]', re.I)
|
RGXP_q = re.compile(r'\[\[([^\[\]|]+)\|([^\]|]+)]]', re.I)
|
||||||
RGXP_r = re.compile(r'\[\[([^\[\]|]+)\]\]', re.I)
|
RGXP_r = re.compile(r'\[\[([^\[\]|]+)\]\]', re.I)
|
||||||
RGXP_s = re.compile(r'\s+')
|
RGXP_s = re.compile(r'\s+')
|
||||||
|
|
||||||
def striplink(s):
|
def striplink(s):
|
||||||
|
s.replace("{{m}}", "masculin").replace("{{f}}", "feminin").replace("{{n}}", "neutre")
|
||||||
|
(s, n) = RGXP_m.subn(r"[\1]", s)
|
||||||
(s, n) = RGXP_l.subn(r"\2", s)
|
(s, n) = RGXP_l.subn(r"\2", s)
|
||||||
if s == "": return s
|
|
||||||
|
|
||||||
(s, n) = RGXP_q.subn(r"\1", s)
|
(s, n) = RGXP_q.subn(r"\1", s)
|
||||||
if s == "": return s
|
|
||||||
|
|
||||||
(s, n) = RGXP_r.subn(r"\1", s)
|
(s, n) = RGXP_r.subn(r"\1", s)
|
||||||
if s == "": return s
|
|
||||||
|
|
||||||
(s, n) = RGXP_p.subn('', s)
|
(s, n) = RGXP_p.subn('', s)
|
||||||
|
if s == "": return s
|
||||||
|
|
||||||
(s, n) = RGXP_t.subn("\x03\x16" + r"\1" + " :\x03\x16 " + r"\2", s)
|
(s, n) = RGXP_t.subn("\x03\x16" + r"\1" + " :\x03\x16 " + r"\2", s)
|
||||||
return s.replace("'''", "\x03\x02").replace("''", "\x03\x1f")
|
return s.replace("'''", "\x03\x02").replace("''", "\x03\x1f")
|
||||||
|
@ -115,6 +115,18 @@ def wiki(msg):
|
|||||||
s = Wikipedia.Wikipedia(' '.join(msg.cmds[1:]), lang, site, section)
|
s = Wikipedia.Wikipedia(' '.join(msg.cmds[1:]), lang, site, section)
|
||||||
|
|
||||||
res = Response(msg.sender, channel=msg.channel, nomore="No more results")
|
res = Response(msg.sender, channel=msg.channel, nomore="No more results")
|
||||||
|
if site == "wiktionary.org":
|
||||||
|
tout = [result for result in s.nextRes if result.find("\x03\x16 :\x03\x16 ") != 0]
|
||||||
|
tout.remove(tout[0])
|
||||||
|
defI=1
|
||||||
|
for t in tout:
|
||||||
|
if t.find("# ") == 0:
|
||||||
|
t = t.replace("# ", "%d. " % defI)
|
||||||
|
defI += 1
|
||||||
|
elif t.find("#* ") == 0:
|
||||||
|
t = t.replace("#* ", " * ")
|
||||||
|
res.append_message(t)
|
||||||
|
else:
|
||||||
for result in s.nextRes:
|
for result in s.nextRes:
|
||||||
res.append_message(result)
|
res.append_message(result)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user