# coding=utf-8 """Use MediaWiki API to get pages""" import json import re import urllib.parse from nemubot.exception import IRCException from nemubot.hooks import hook from nemubot.tools import web nemubotversion = 3.4 from more import Response # MEDIAWIKI REQUESTS ################################################## def get_namespaces(site, ssl=False): # Built URL url = "http%s://%s/w/api.php?format=json&action=query&meta=siteinfo&siprop=namespaces" % ( "s" if ssl else "", site) # Make the request data = web.getJSON(url) namespaces = dict() for ns in data["query"]["namespaces"]: namespaces[data["query"]["namespaces"][ns]["*"]] = data["query"]["namespaces"][ns] return namespaces def get_raw_page(site, term, ssl=False): # Built URL url = "http%s://%s/w/api.php?format=json&redirects&action=query&prop=revisions&rvprop=content&titles=%s" % ( "s" if ssl else "", site, urllib.parse.quote(term)) # Make the request data = web.getJSON(url) for k in data["query"]["pages"]: try: return data["query"]["pages"][k]["revisions"][0]["*"] except: raise IRCException("article not found") def get_unwikitextified(site, wikitext, ssl=False): # Built URL url = "http%s://%s/w/api.php?format=json&action=expandtemplates&text=%s" % ( "s" if ssl else "", site, urllib.parse.quote(wikitext)) # Make the request data = web.getJSON(url) return data["expandtemplates"]["*"] ## Search def opensearch(site, term, ssl=False): # Built URL url = "http%s://%s/w/api.php?format=xml&action=opensearch&search=%s" % ( "s" if ssl else "", site, urllib.parse.quote(term)) # Make the request response = web.getXML(url) if response is not None and response.hasNode("Section"): for itm in response.getNode("Section").getNodes("Item"): yield (itm.getNode("Text").getContent(), itm.getNode("Description").getContent(), itm.getNode("Url").getContent()) def search(site, term, ssl=False): # Built URL url = "http%s://%s/w/api.php?format=json&action=query&list=search&srsearch=%s&srprop=titlesnippet|snippet" % ( "s" if ssl else "", site, urllib.parse.quote(term)) # Make the request data = web.getJSON(url) if data is not None and "query" in data and "search" in data["query"]: for itm in data["query"]["search"]: yield (web.striphtml(itm["titlesnippet"].replace("", "\x03\x02").replace("", "\x03\x02")), web.striphtml(itm["snippet"].replace("", "\x03\x02").replace("", "\x03\x02"))) # PARSING FUNCTIONS ################################################### def strip_model(cnt): # Strip models at begin: mostly useless cnt = re.sub(r"^(({{([^{]|\s|({{([^{]|\s|{{.*?}})*?}})*?)*?}}|\[\[([^[]|\s|\[\[.*?\]\])*?\]\])\s*)+", "", cnt, flags=re.DOTALL) # Remove new line from models for full in re.findall(r"{{.*?}}", cnt, flags=re.DOTALL): cnt = cnt.replace(full, full.replace("\n", " "), 1) # Remove new line after titles cnt, _ = re.subn(r"((?P