2012-06-30 15:06:34 +00:00
|
|
|
# coding=utf-8
|
|
|
|
|
|
|
|
import http.client
|
|
|
|
import re
|
|
|
|
from urllib.parse import quote
|
|
|
|
|
2012-08-31 00:58:28 +00:00
|
|
|
import xmlparser
|
2012-06-30 15:06:34 +00:00
|
|
|
|
|
|
|
class DDGSearch:
|
2012-08-31 00:58:28 +00:00
|
|
|
def __init__(self, terms):
|
|
|
|
self.terms = terms
|
|
|
|
(res, page) = getPage(terms)
|
|
|
|
if res == http.client.OK or res == http.client.SEE_OTHER:
|
|
|
|
self.ddgres = xmlparser.parse_string(page)
|
|
|
|
else:
|
|
|
|
self.ddgres = None
|
2012-06-30 15:06:34 +00:00
|
|
|
|
2012-08-31 00:58:28 +00:00
|
|
|
@property
|
|
|
|
def type(self):
|
|
|
|
if self.ddgres and self.ddgres.hasNode("Type"):
|
|
|
|
return self.ddgres.getFirstNode("Type").getContent()
|
|
|
|
else:
|
|
|
|
return ""
|
2012-06-30 15:06:34 +00:00
|
|
|
|
2012-08-31 00:58:28 +00:00
|
|
|
@property
|
|
|
|
def definition(self):
|
|
|
|
if self.ddgres.hasNode("Definition"):
|
|
|
|
return self.ddgres.getFirstNode("Definition").getContent()
|
|
|
|
else:
|
|
|
|
return "Sorry, no definition found for %s" % self.terms
|
2012-06-30 15:06:34 +00:00
|
|
|
|
2012-08-31 00:58:28 +00:00
|
|
|
@property
|
|
|
|
def relatedTopics(self):
|
|
|
|
try:
|
|
|
|
for rt in self.ddgres.getFirstNode("RelatedTopics").getNodes("RelatedTopic"):
|
|
|
|
yield rt.getFirstNode("Text").getContent()
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@property
|
|
|
|
def redirect(self):
|
|
|
|
try:
|
|
|
|
return self.ddgres.getFirstNode("Redirect").getContent()
|
|
|
|
except:
|
|
|
|
return None
|
|
|
|
|
|
|
|
@property
|
|
|
|
def result(self):
|
|
|
|
try:
|
|
|
|
node = self.ddgres.getFirstNode("Results").getFirstNode("Result")
|
|
|
|
return node.getFirstNode("Text").getContent() + ": " + node.getFirstNode("FirstURL").getContent()
|
|
|
|
except:
|
|
|
|
return None
|
|
|
|
|
|
|
|
@property
|
|
|
|
def answer(self):
|
|
|
|
try:
|
|
|
|
return striphtml(self.ddgres.getFirstNode("Answer").getContent())
|
|
|
|
except:
|
|
|
|
return None
|
|
|
|
|
|
|
|
@property
|
|
|
|
def abstract(self):
|
|
|
|
try:
|
|
|
|
return self.ddgres.getNode("Abstract").getContent() + " <" + self.ddgres.getNode("AbstractURL").getContent() + ">"
|
|
|
|
except:
|
|
|
|
return None
|
2012-06-30 15:06:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
def striphtml(data):
|
2012-08-31 00:58:28 +00:00
|
|
|
p = re.compile(r'<.*?>')
|
|
|
|
return p.sub('', data).replace("(", "/(").replace(")", ")/").replace(""", "\"")
|
2012-06-30 15:06:34 +00:00
|
|
|
|
|
|
|
def getPage(terms):
|
2012-08-31 00:58:28 +00:00
|
|
|
conn = http.client.HTTPConnection("api.duckduckgo.com", timeout=5)
|
|
|
|
try:
|
|
|
|
conn.request("GET", "/?q=%s&format=xml" % quote(terms))
|
|
|
|
except socket.gaierror:
|
|
|
|
print ("impossible de récupérer la page %s."%(p))
|
|
|
|
return (http.client.INTERNAL_SERVER_ERROR, None)
|
2012-06-30 15:06:34 +00:00
|
|
|
|
2012-08-31 00:58:28 +00:00
|
|
|
res = conn.getresponse()
|
|
|
|
data = res.read()
|
2012-06-30 15:06:34 +00:00
|
|
|
|
2012-08-31 00:58:28 +00:00
|
|
|
conn.close()
|
|
|
|
return (res.status, data)
|