New module: ddg: search on internet through Duckduckgo and Wolfram|Alpha

This commit is contained in:
Némunaire 2012-06-30 17:06:34 +02:00
parent cf18e7c2e7
commit 16f6cefc59
3 changed files with 187 additions and 0 deletions

68
modules/ddg/DDGSearch.py Normal file
View File

@ -0,0 +1,68 @@
# coding=utf-8
import http.client
import re
from urllib.parse import quote
import module_states_file as xmlparser
class DDGSearch:
def __init__(self, terms):
self.terms = terms
self.curRT = -1
(res, page) = getPage(terms)
if res == http.client.OK:
self.ddgres = xmlparser.parse_string(page)
else:
self.ddgres = None
@property
def type(self):
return self.ddgres.getFirstNode("Type").getContent()
@property
def definition(self):
if self.ddgres.hasNode("Definition"):
return self.ddgres.getFirstNode("Definition").getContent()
else:
return "Sorry, no definition found for %s" % self.terms
@property
def nextRes(self):
if self.type == "D":
if len(self.ddgres.getFirstNode("RelatedTopics").getNodes("RelatedTopic")) > self.curRT + 1:
self.curRT += 1
node = self.ddgres.getFirstNode("RelatedTopics").getNodes("RelatedTopic")[self.curRT]
return node.getFirstNode("Text").getContent()
elif self.ddgres.hasNode("Answer"):
if self.curRT < 0:
self.curRT = 0
return striphtml(self.ddgres.getFirstNode("Answer").getContent())
elif self.ddgres.hasNode("Abstract") and len (self.ddgres.getNode("Abstract").getContent()) > 0:
if self.curRT < 0:
self.curRT = 0
return self.ddgres.getNode("Abstract").getContent() + " <" + self.ddgres.getNode("AbstractURL").getContent() + ">"
elif len(self.ddgres.getFirstNode("RelatedTopics").getNodes("RelatedTopic")) > self.curRT:
node = self.ddgres.getFirstNode("RelatedTopics").getNodes("RelatedTopic")[self.curRT]
self.curRT += 1
return node.getFirstNode("Text").getContent()
return "No more results"
def striphtml(data):
p = re.compile(r'<.*?>')
return p.sub('', data)
def getPage(terms):
conn = http.client.HTTPConnection("api.duckduckgo.com")
try:
conn.request("GET", "/?q=%s&format=xml" % quote(terms))
except socket.gaierror:
print ("impossible de récupérer la page %s."%(p))
return (http.client.INTERNAL_SERVER_ERROR, None)
res = conn.getresponse()
data = res.read()
conn.close()
return (res.status, data)

69
modules/ddg/WFASearch.py Normal file
View File

@ -0,0 +1,69 @@
# coding=utf-8
import http.client
import re
import socket
from urllib.parse import quote
import module_states_file as xmlparser
class WFASearch:
def __init__(self, terms):
self.terms = terms
self.curPod = 1
self.curSubPod = 0
(res, page) = getPage(terms)
if res == http.client.OK:
self.wfares = xmlparser.parse_string(page)
else:
self.wfares = None
@property
def success(self):
try:
return self.wfares["success"] == "true"
except:
return False
@property
def error(self):
return self.wfares["error"] == "true"
@property
def nextRes(self):
try:
if len(self.wfares.getNodes("pod")) > self.curPod:
txt = ""
while txt == "" or subnode.getFirstNode("plaintext").getContent().strip() == "":
node = self.wfares.getNodes("pod")[self.curPod]
subnode = node.getNodes("subpod")[self.curSubPod]
self.curSubPod += 1
if len(node.getNodes("subpod")) <= self.curSubPod:
self.curPod += 1
self.curSubPod = 0
txt = node["title"] + ": " + subnode.getFirstNode("plaintext").getContent().strip()
return txt
except IndexError:
pass
self.curPod = 1
return "No more results"
def getPage(terms):
conn = http.client.HTTPConnection("api.wolframalpha.com")
try:
conn.request("GET", "/v2/query?input=%s&appid=%s" % (quote(terms), CONF.getNode("wfaapi")["key"]))
except socket.gaierror:
print ("impossible de récupérer la page Wolfram|Alpha.")
return (http.client.INTERNAL_SERVER_ERROR, None)
except (TypeError, KeyError):
print ("You need an Wolfram|Alpha API key in order to use this module. Add it to the configuration file:\n<wfaapi key=\"XXXXXX-XXXXXXXXXX\" />\nRegister at http://products.wolframalpha.com/api/")
return (http.client.INTERNAL_SERVER_ERROR, None)
res = conn.getresponse()
data = res.read()
conn.close()
return (res.status, data)

50
modules/ddg/__init__.py Normal file
View File

@ -0,0 +1,50 @@
# coding=utf-8
nemubotversion = 3.0
from .DDGSearch import DDGSearch
from . import WFASearch
lastSearch = dict()
def load():
global CONF
WFASearch.CONF = CONF
def parseanswer(msg):
global lastSearch
req = None
if msg.cmd[0] == "def" or msg.cmd[0] == "d" or msg.cmd[0] == "define" or msg.cmd[0] == "defini" or msg.cmd[0] == "definit" or msg.cmd[0] == "definition":
req = "def"
elif msg.cmd[0] == "g" or msg.cmd[0] == "ddg" or msg.cmd[0] == "d":
req = "link"
elif msg.cmd[0] == "wfa" or msg.cmd[0] == "calc" or msg.cmd[0] == "wa":
req = "wfa"
if msg.cmd[0] == "more" or msg.cmd[0] == "plus":
if msg.channel in lastSearch and lastSearch[msg.channel] is not None:
msg.send_chn(lastSearch[msg.channel].nextRes)
else:
msg.send_chn("There is no ongoing research.")
elif req is not None:
if len(msg.cmd) > 1:
if req == "wfa":
s = WFASearch.WFASearch(' '.join(msg.cmd[1:]))
if not s.success:
msg.send_chn("An error occurs during computation")
return True
else:
s = DDGSearch(' '.join(msg.cmd[1:]))
if req == "def":
msg.send_chn(s.definition)
else:
msg.send_chn(s.nextRes)
lastSearch[msg.channel] = s
else:
msg.send_chn("What are you looking for?")
return True
return False