imdb: switch to ugly IMDB HTML parsing
This commit is contained in:
parent
1dae3c713a
commit
694c54a6bc
@ -5,6 +5,8 @@
|
|||||||
import re
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from nemubot.exception import IMException
|
from nemubot.exception import IMException
|
||||||
from nemubot.hooks import hook
|
from nemubot.hooks import hook
|
||||||
from nemubot.tools import web
|
from nemubot.tools import web
|
||||||
@ -14,54 +16,46 @@ from more import Response
|
|||||||
|
|
||||||
# MODULE CORE #########################################################
|
# MODULE CORE #########################################################
|
||||||
|
|
||||||
def get_movie(title=None, year=None, imdbid=None, fullplot=True, tomatoes=False):
|
def get_movie_by_id(imdbid):
|
||||||
"""Returns the information about the matching movie"""
|
"""Returns the information about the matching movie"""
|
||||||
|
|
||||||
# Built URL
|
url = "http://www.imdb.com/title/" + urllib.parse.quote(imdbid)
|
||||||
url = "http://www.omdbapi.com/?"
|
soup = BeautifulSoup(web.getURLContent(url))
|
||||||
if title is not None:
|
|
||||||
url += "t=%s&" % urllib.parse.quote(title)
|
|
||||||
if year is not None:
|
|
||||||
url += "y=%s&" % urllib.parse.quote(year)
|
|
||||||
if imdbid is not None:
|
|
||||||
url += "i=%s&" % urllib.parse.quote(imdbid)
|
|
||||||
if fullplot:
|
|
||||||
url += "plot=full&"
|
|
||||||
if tomatoes:
|
|
||||||
url += "tomatoes=true&"
|
|
||||||
|
|
||||||
# Make the request
|
return {
|
||||||
data = web.getJSON(url)
|
"imdbID": imdbid,
|
||||||
|
"Title": soup.body.find(attrs={"itemprop": "name"}).next_element.strip(),
|
||||||
|
"Year": soup.body.find(id="titleYear").find("a").text.strip() if soup.body.find(id="titleYear") else ", ".join([y.text.strip() for y in soup.body.find(attrs={"class": "seasons-and-year-nav"}).find_all("div")[3].find_all("a")[:-1]]),
|
||||||
|
"Duration": soup.body.find_all(attrs={"itemprop": "duration"})[-1].text.strip(),
|
||||||
|
"imdbRating": soup.body.find(attrs={"itemprop": "ratingValue"}).text.strip(),
|
||||||
|
"imdbVotes": soup.body.find(attrs={"itemprop": "ratingCount"}).text.strip(),
|
||||||
|
"Plot": re.sub(r"\s+", " ", soup.body.find(id="titleStoryLine").find(attrs={"itemprop": "description"}).text).strip(),
|
||||||
|
|
||||||
# Return data
|
"Type": "TV Series" if soup.find(attrs={"class": "np_episode_guide"}) else "Movie",
|
||||||
if "Error" in data:
|
"Country": ", ".join([c.find("a").text.strip() for c in soup.body.find(id="titleDetails").find_all(attrs={"class": "txt-block"}) if c.text.find("Country") != -1]),
|
||||||
raise IMException(data["Error"])
|
"Released": soup.body.find(attrs={"itemprop": "datePublished"}).attrs["content"] if "content" in soup.body.find(attrs={"itemprop": "datePublished"}).attrs else "N\A",
|
||||||
|
"Genre": ", ".join([g.text.strip() for g in soup.body.find_all(attrs={"itemprop": "genre"})[:-1]]),
|
||||||
elif "Response" in data and data["Response"] == "True":
|
"Director": ", ".join([d.find(attrs={"itemprop": "name"}).text.strip() for d in soup.body.find_all(attrs={"itemprop": "director"})]),
|
||||||
return data
|
"Writer": ", ".join([d.find(attrs={"itemprop": "name"}).text.strip() for d in soup.body.find_all(attrs={"itemprop": "creator"})]),
|
||||||
|
"Actors": ", ".join([d.find(attrs={"itemprop": "name"}).text.strip() for d in soup.body.find_all(attrs={"itemprop": "actors"})]),
|
||||||
else:
|
}
|
||||||
raise IMException("An error occurs during movie search")
|
|
||||||
|
|
||||||
|
|
||||||
def find_movies(title):
|
def find_movies(title, year=None):
|
||||||
"""Find existing movies matching a approximate title"""
|
"""Find existing movies matching a approximate title"""
|
||||||
|
|
||||||
|
title = title.lower()
|
||||||
|
|
||||||
# Built URL
|
# Built URL
|
||||||
url = "http://www.omdbapi.com/?s=%s" % urllib.parse.quote(title)
|
url = "https://v2.sg.media-imdb.com/suggests/%s/%s.json" % (urllib.parse.quote(title[0]), urllib.parse.quote(title.replace(" ", "_")))
|
||||||
|
|
||||||
# Make the request
|
# Make the request
|
||||||
data = web.getJSON(url)
|
data = web.getJSON(url, remove_callback=True)
|
||||||
|
|
||||||
# Return data
|
|
||||||
if "Error" in data:
|
|
||||||
raise IMException(data["Error"])
|
|
||||||
|
|
||||||
elif "Search" in data:
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
if year is None:
|
||||||
|
return data["d"]
|
||||||
else:
|
else:
|
||||||
raise IMException("An error occurs during movie search")
|
return [d for d in data["d"] if "y" in d and str(d["y"]) == year]
|
||||||
|
|
||||||
|
|
||||||
# MODULE INTERFACE ####################################################
|
# MODULE INTERFACE ####################################################
|
||||||
@ -79,23 +73,28 @@ def cmd_imdb(msg):
|
|||||||
title = ' '.join(msg.args)
|
title = ' '.join(msg.args)
|
||||||
|
|
||||||
if re.match("^tt[0-9]{7}$", title) is not None:
|
if re.match("^tt[0-9]{7}$", title) is not None:
|
||||||
data = get_movie(imdbid=title)
|
data = get_movie_by_id(imdbid=title)
|
||||||
else:
|
else:
|
||||||
rm = re.match(r"^(.+)\s\(([0-9]{4})\)$", title)
|
rm = re.match(r"^(.+)\s\(([0-9]{4})\)$", title)
|
||||||
if rm is not None:
|
if rm is not None:
|
||||||
data = get_movie(title=rm.group(1), year=rm.group(2))
|
data = find_movies(rm.group(1), year=rm.group(2))
|
||||||
else:
|
else:
|
||||||
data = get_movie(title=title)
|
data = find_movies(title)
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
raise IMException("Movie/series not found")
|
||||||
|
|
||||||
|
data = get_movie_by_id(data[0]["id"])
|
||||||
|
|
||||||
res = Response(channel=msg.channel,
|
res = Response(channel=msg.channel,
|
||||||
title="%s (%s)" % (data['Title'], data['Year']),
|
title="%s (%s)" % (data['Title'], data['Year']),
|
||||||
nomore="No more information, more at http://www.imdb.com/title/%s" % data['imdbID'])
|
nomore="No more information, more at http://www.imdb.com/title/%s" % data['imdbID'])
|
||||||
|
|
||||||
res.append_message("\x02rating\x0F: %s (%s votes); \x02plot\x0F: %s" %
|
res.append_message("%s \x02genre:\x0F %s; \x02rating\x0F: %s (%s votes); \x02plot\x0F: %s" %
|
||||||
(data['imdbRating'], data['imdbVotes'], data['Plot']))
|
(data['Type'], data['Genre'], data['imdbRating'], data['imdbVotes'], data['Plot']))
|
||||||
|
|
||||||
res.append_message("%s \x02from\x0F %s \x02released on\x0F %s; \x02genre:\x0F %s; \x02directed by:\x0F %s; \x02written by:\x0F %s; \x02main actors:\x0F %s"
|
res.append_message("%s \x02from\x0F %s \x02released on\x0F %s; \x02directed by:\x0F %s; \x02written by:\x0F %s; \x02main actors:\x0F %s"
|
||||||
% (data['Type'], data['Country'], data['Released'], data['Genre'], data['Director'], data['Writer'], data['Actors']))
|
% (data['Type'], data['Country'], data['Released'], data['Director'], data['Writer'], data['Actors']))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
@ -111,7 +110,7 @@ def cmd_search(msg):
|
|||||||
data = find_movies(' '.join(msg.args))
|
data = find_movies(' '.join(msg.args))
|
||||||
|
|
||||||
movies = list()
|
movies = list()
|
||||||
for m in data['Search']:
|
for m in data:
|
||||||
movies.append("\x02%s\x0F (%s of %s)" % (m['Title'], m['Type'], m['Year']))
|
movies.append("\x02%s\x0F%s with %s" % (m['l'], (" (" + str(m['y']) + ")") if "y" in m else "", m['s']))
|
||||||
|
|
||||||
return Response(movies, title="Titles found", channel=msg.channel)
|
return Response(movies, title="Titles found", channel=msg.channel)
|
||||||
|
Loading…
Reference in New Issue
Block a user