Parse Wiktionary

This commit is contained in:
Némunaire 2012-11-07 15:37:17 +01:00
parent c3c697bdab
commit b3a9ccf75a
2 changed files with 21 additions and 8 deletions

View File

@ -33,23 +33,24 @@ class Wikipedia:
if c != "":
yield c
RGXP_p = re.compile(r'(<!--.*-->|<ref[^>]*/>|<ref[^>]*>[^>]*</ref>|<dfn[^>]*>[^>]*</dfn>|\{\{[^}]*\}\}|\[\[([^\[\]]*\[\[[^\]\[]*\]\])+[^\[\]]*\]\]|\{\{([^{}]*\{\{.*\}\}[^{}]*)+\}\}|\[\[[^\]|]+(\|[^\]\|]+)*\]\])', re.I)
RGXP_p = re.compile(r"(<!--.*-->|<ref[^>]*/>|<ref[^>]*>[^>]*</ref>|<dfn[^>]*>[^>]*</dfn>|\{\{[^}]*\}\}|\[\[([^\[\]]*\[\[[^\]\[]*\]\])+[^\[\]]*\]\]|\{\{([^{}]*\{\{.*\}\}[^{}]*)+\}\}|\[\[[^\]|]+(\|[^\]\|]+)*\]\])|#\* ''" + "\n", re.I)
RGXP_l = re.compile(r'\{\{(nobr|lang\|[^|}]+)\|([^}]+)\}\}', re.I)
RGXP_m = re.compile(r'\{\{pron\|([^|}]+)\|[^}]+\}\}', re.I)
RGXP_t = re.compile("==+ *([^=]+) *=+=\n+([^\n])", re.I)
RGXP_q = re.compile(r'\[\[([^\[\]|]+)\|([^\]|]+)]]', re.I)
RGXP_r = re.compile(r'\[\[([^\[\]|]+)\]\]', re.I)
RGXP_s = re.compile(r'\s+')
def striplink(s):
s.replace("{{m}}", "masculin").replace("{{f}}", "feminin").replace("{{n}}", "neutre")
(s, n) = RGXP_m.subn(r"[\1]", s)
(s, n) = RGXP_l.subn(r"\2", s)
if s == "": return s
(s, n) = RGXP_q.subn(r"\1", s)
if s == "": return s
(s, n) = RGXP_r.subn(r"\1", s)
if s == "": return s
(s, n) = RGXP_p.subn('', s)
if s == "": return s
(s, n) = RGXP_t.subn("\x03\x16" + r"\1" + " :\x03\x16 " + r"\2", s)
return s.replace("'''", "\x03\x02").replace("''", "\x03\x1f")

View File

@ -115,6 +115,18 @@ def wiki(msg):
s = Wikipedia.Wikipedia(' '.join(msg.cmds[1:]), lang, site, section)
res = Response(msg.sender, channel=msg.channel, nomore="No more results")
if site == "wiktionary.org":
tout = [result for result in s.nextRes if result.find("\x03\x16 :\x03\x16 ") != 0]
tout.remove(tout[0])
defI=1
for t in tout:
if t.find("# ") == 0:
t = t.replace("# ", "%d. " % defI)
defI += 1
elif t.find("#* ") == 0:
t = t.replace("#* ", " * ")
res.append_message(t)
else:
for result in s.nextRes:
res.append_message(result)