From 823d44ed0a267335fce4d6ae172a4342e7439d7b Mon Sep 17 00:00:00 2001 From: Paul Alcock <25768075+Guilvareux@users.noreply.github.com> Date: Wed, 22 Sep 2021 01:55:09 +0100 Subject: [PATCH 1/2] [mod] engines - add IMDB / Internet Movie Database Merged from @Guilvareux's commit [1] and slightly modfied / see [2]. [1] https://github.com/searx/searx/pull/2980/commits/f2f90071 [2] https://github.com/searx/searx/pull/2980 --- AUTHORS.rst | 1 + searx/engines/imdb.py | 95 +++++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 6 +++ 3 files changed, 102 insertions(+) create mode 100644 searx/engines/imdb.py diff --git a/AUTHORS.rst b/AUTHORS.rst index 0c97790e4..0239346a2 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -162,3 +162,4 @@ features or generally made searx better: - @jhigginbotham - @xenrox - @OliveiraHermogenes +- Paul Alcock @Guilvareux diff --git a/searx/engines/imdb.py b/searx/engines/imdb.py new file mode 100644 index 000000000..88913ddc6 --- /dev/null +++ b/searx/engines/imdb.py @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint + +"""IMDB - Internet Movie Database + +Retrieves results from a basic search. Advanced search options are not +supported. IMDB's API is undocumented, here are some posts about: + +- https://stackoverflow.com/questions/1966503/does-imdb-provide-an-api +- https://rapidapi.com/blog/how-to-use-imdb-api/ + +An alternative that needs IMDPro_ is `IMDb and Box Office Mojo +`_ + +.. __IMDPro: https://pro.imdb.com/login + +""" + +import json + +about = { + "website": 'https://imdb.com/', + "wikidata_id": 'Q37312', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + +categories = ['general', ] +paging = False + +# suggestion_url = "https://sg.media-imdb.com/suggestion/{letter}/{query}.json" +suggestion_url = "https://v2.sg.media-imdb.com/suggestion/{letter}/{query}.json" + +href_base = 'https://imdb.com/{category}/{entry_id}' + +search_categories = { + "nm": "name", + "tt": "title", + "kw": "keyword", + "co": "company", + "ep": "episode" +} + + +def request(query, params): + + query = query.replace(" ", "_").lower() + params['url'] = suggestion_url.format(letter=query[0], query=query) + + return params + + +def response(resp): + + suggestions = json.loads(resp.text) + results = [] + + for entry in suggestions['d']: + + # https://developer.imdb.com/documentation/key-concepts#imdb-ids + entry_id = entry['id'] + categ = search_categories.get(entry_id[:2]) + if categ is None: + logger.error( + 'skip unknown category tag %s in %s', entry_id[:2], entry_id + ) + continue + + title = entry['l'] + if 'q' in entry: + title += " (%s)" % entry['q'] + + content = '' + if 'rank' in entry: + content += "(%s) " % entry['rank'] + if 'y' in entry: + content += str(entry['y']) + " - " + if 's' in entry: + content += entry['s'] + + # imageUrl is the image itself, it is not a thumb! + # image_url = entry['i']['imageUrl'] + + results.append({ + "title": title, + "url": href_base.format(category=categ, entry_id=entry_id), + "content": content, + # "thumbnail" : image_url, + # "template": "videos.html", + + }) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 207024192..cce70ac53 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -744,6 +744,12 @@ engines: require_api_key: false results: JSON + - name: imdb + engine: imdb + shortcut: imdb + timeout: 6.0 + disabled: true + - name: ina engine: ina shortcut: in From 8a897b86f1d5eb921f25efcf736b67e4485a3065 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 5 Oct 2021 09:00:13 +0200 Subject: [PATCH 2/2] [mod] engines - IMDB: add thumbnails --- searx/engines/imdb.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/searx/engines/imdb.py b/searx/engines/imdb.py index 88913ddc6..a3dab736d 100644 --- a/searx/engines/imdb.py +++ b/searx/engines/imdb.py @@ -81,15 +81,25 @@ def response(resp): content += entry['s'] # imageUrl is the image itself, it is not a thumb! - # image_url = entry['i']['imageUrl'] - + image_url = entry.get('i', {}).get('imageUrl') + if image_url: + # get thumbnail + image_url_name, image_url_prefix = image_url.rsplit('.', 1) + # recipe to get the magic value: + # * search on imdb.com, look at the URL of the thumbnail on the right side of the screen + # * search using the imdb engine, compare the imageUrl and thumbnail URL + # QL75 : JPEG quality (?) + # UX280 : resize to width 320 + # 280,414 : size of the image (add white border) + magic = 'QL75_UX280_CR0,0,280,414_' + if not image_url_name.endswith('_V1_'): + magic = '_V1_' + magic + image_url = image_url_name + magic + '.' + image_url_prefix results.append({ "title": title, "url": href_base.format(category=categ, entry_id=entry_id), "content": content, - # "thumbnail" : image_url, - # "template": "videos.html", - + "img_src" : image_url, }) return results