diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py index 196c89a2b..c35799c69 100644 --- a/searx/engines/tineye.py +++ b/searx/engines/tineye.py @@ -14,10 +14,16 @@ billion images `[tineye.com] `_. """ +from typing import TYPE_CHECKING from urllib.parse import urlencode from datetime import datetime from flask_babel import gettext +if TYPE_CHECKING: + import logging + + logger = logging.getLogger() + about = { "website": 'https://tineye.com', "wikidata_id": 'Q2382535', @@ -34,7 +40,7 @@ categories = ['general'] paging = True safesearch = False base_url = 'https://tineye.com' -search_string = '/result_json/?page={page}&{query}' +search_string = '/api/v1/result_json/?page={page}&{query}' FORMAT_NOT_SUPPORTED = gettext( "Could not read that image url. This may be due to an unsupported file" @@ -120,7 +126,7 @@ def parse_tineye_match(match_json): crawl_date = backlink_json.get("crawl_date") if crawl_date: - crawl_date = datetime.fromisoformat(crawl_date[:-3]) + crawl_date = datetime.strptime(crawl_date, '%Y-%m-%d') else: crawl_date = datetime.min @@ -150,29 +156,15 @@ def parse_tineye_match(match_json): def response(resp): """Parse HTTP response from TinEye.""" - results = [] - try: + # handle the 422 client side errors, and the possible 400 status code error + if resp.status_code in (400, 422): json_data = resp.json() - except Exception as exc: # pylint: disable=broad-except - msg = "can't parse JSON response // %s" % exc - logger.error(msg) - json_data = {'error': msg} - - # handle error codes from Tineye - - if resp.is_error: - if resp.status_code in (400, 422): - - message = 'HTTP status: %s' % resp.status_code - error = json_data.get('error') - s_key = json_data.get('suggestions', {}).get('key', '') - - if error and s_key: - message = "%s (%s)" % (error, s_key) - elif error: - message = error + suggestions = json_data.get('suggestions', {}) + message = f'HTTP Status Code: {resp.status_code}' + if resp.status_code == 422: + s_key = suggestions.get('key', '') if s_key == "Invalid image URL": # test https://docs.searxng.org/_static/searxng-wordmark.svg message = FORMAT_NOT_SUPPORTED @@ -182,16 +174,23 @@ def response(resp): elif s_key == 'Download Error': # test https://notexists message = DOWNLOAD_ERROR + else: + logger.warning("Unknown suggestion key encountered: %s", s_key) + else: # 400 + description = suggestions.get('description') + if isinstance(description, list): + message = ','.join(description) - # see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023 - # results.append({'answer': message}) - logger.error(message) + # see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023 + # results.append({'answer': message}) + logger.error(message) + return [] - return results + # Raise for all other responses + resp.raise_for_status() - resp.raise_for_status() - - # append results from matches + results = [] + json_data = resp.json() for match_json in json_data['matches']: diff --git a/tests/unit/test_tineye.py b/tests/unit/test_tineye.py new file mode 100644 index 000000000..0530b4c5e --- /dev/null +++ b/tests/unit/test_tineye.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring + + +from datetime import datetime +from unittest.mock import Mock +from requests import HTTPError +from searx.engines import load_engines, tineye +from tests import SearxTestCase + + +class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring + + def setUp(self): + load_engines([{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}]) + + def tearDown(self): + load_engines([]) + + def test_status_code_raises(self): + response = Mock() + response.status_code = 401 + response.raise_for_status.side_effect = HTTPError() + self.assertRaises(HTTPError, lambda: tineye.response(response)) + + def test_returns_empty_list_for_422(self): + response = Mock() + response.json.return_value = {} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + with self.assertLogs(tineye.logger) as _dev_null: + results = tineye.response(response) + self.assertEqual(0, len(results)) + + def test_logs_format_for_422(self): + response = Mock() + response.json.return_value = {"suggestions": {"key": "Invalid image URL"}} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output)) + + def test_logs_signature_for_422(self): + response = Mock() + response.json.return_value = {"suggestions": {"key": "NO_SIGNATURE_ERROR"}} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output)) + + def test_logs_download_for_422(self): + response = Mock() + response.json.return_value = {"suggestions": {"key": "Download Error"}} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output)) + + def test_empty_list_for_400(self): + response = Mock() + response.json.return_value = {} + response.status_code = 400 + response.raise_for_status.side_effect = HTTPError() + with self.assertLogs(tineye.logger) as _dev_null: + results = tineye.response(response) + self.assertEqual(0, len(results)) + + def test_logs_description_for_400(self): + description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645' + response = Mock() + response.json.return_value = {"suggestions": {"description": [description], "title": "Oops! We're sorry!"}} + response.status_code = 400 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(description, ','.join(assert_logs_context.output)) + + def test_crawl_date_parses(self): + date_str = '2020-05-25' + date = datetime.strptime(date_str, '%Y-%m-%d') + response = Mock() + response.json.return_value = { + 'matches': [ + { + 'backlinks': [ + { + 'crawl_date': date_str, + } + ] + } + ] + } + response.status_code = 200 + results = tineye.response(response) + self.assertEqual(date, results[0]['publishedDate'])