From 1f908a6222638b547016f5c21472ae26a76adbd2 Mon Sep 17 00:00:00 2001 From: Richard Lyons Date: Fri, 21 Jun 2024 15:58:12 +0200 Subject: [PATCH 1/2] [fix] engine unit tests. Enables unit tests in the engines directory by adding __init__.py, and fixups for the enabled tests. --- tests/unit/engines/__init__.py | 2 ++ tests/unit/engines/test_command.py | 29 +++++++++--------- tests/unit/engines/test_xpath.py | 47 +++++++++++++++++------------- tests/unit/test_query.py | 1 + 4 files changed, 44 insertions(+), 35 deletions(-) create mode 100644 tests/unit/engines/__init__.py diff --git a/tests/unit/engines/__init__.py b/tests/unit/engines/__init__.py new file mode 100644 index 000000000..9ed59c825 --- /dev/null +++ b/tests/unit/engines/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring diff --git a/tests/unit/engines/test_command.py b/tests/unit/engines/test_command.py index a7d2d2d56..2123ab168 100644 --- a/tests/unit/engines/test_command.py +++ b/tests/unit/engines/test_command.py @@ -21,7 +21,7 @@ from searx.engines import command as command_engine from tests import SearxTestCase -class TestCommandEngine(SearxTestCase): +class TestCommandEngine(SearxTestCase): # pylint: disable=missing-class-docstring def test_basic_seq_command_engine(self): ls_engine = command_engine ls_engine.command = ['seq', '{{QUERY}}'] @@ -33,10 +33,10 @@ class TestCommandEngine(SearxTestCase): {'number': '4', 'template': 'key-value.html'}, {'number': '5', 'template': 'key-value.html'}, ] - results = ls_engine.search('5'.encode('utf-8'), {'pageno': 1}) + results = ls_engine.search('5', {'pageno': 1}) self.assertEqual(results, expected_results) - def test_delimiter_parsing_command_engine(self): + def test_delimiter_parsing(self): searx_logs = '''DEBUG:searx.webapp:static directory is /home/n/p/searx/searx/static DEBUG:searx.webapp:templates directory is /home/n/p/searx/searx/templates DEBUG:searx.engines:soundcloud engine: Starting background initialization @@ -140,10 +140,10 @@ INFO:werkzeug: * Debugger PIN: 299-578-362''' ] for i in [0, 1]: - results = echo_engine.search(''.encode('utf-8'), {'pageno': i + 1}) + results = echo_engine.search('', {'pageno': i + 1}) self.assertEqual(results, expected_results_by_page[i]) - def test_regex_parsing_command_engine(self): + def test_regex_parsing(self): txt = '''commit 35f9a8c81d162a361b826bbcd4a1081a4fbe76a7 Author: Noémi Ványi Date: Tue Oct 15 11:31:33 2019 +0200 @@ -168,11 +168,12 @@ commit ''' git_log_engine.result_separator = '\n\ncommit ' git_log_engine.delimiter = {} git_log_engine.parse_regex = { - 'commit': '\w{40}', - 'author': '[\w* ]* <\w*@?\w*\.?\w*>', - 'date': 'Date: .*', - 'message': '\n\n.*$', + 'commit': r'\w{40}', + 'author': r'[\w* ]* <\w*@?\w*\.?\w*>', + 'date': r'Date: .*', + 'message': r'\n\n.*$', } + git_log_engine.init({"command": git_log_engine.command, "parse_regex": git_log_engine.parse_regex}) expected_results = [ { 'commit': '35f9a8c81d162a361b826bbcd4a1081a4fbe76a7', @@ -197,7 +198,7 @@ commit ''' }, ] - results = git_log_engine.search(''.encode('utf-8'), {'pageno': 1}) + results = git_log_engine.search('', {'pageno': 1}) self.assertEqual(results, expected_results) def test_working_dir_path_query(self): @@ -207,7 +208,7 @@ commit ''' ls_engine.delimiter = {'chars': ' ', 'keys': ['file']} ls_engine.query_type = 'path' - results = ls_engine.search('.'.encode(), {'pageno': 1}) + results = ls_engine.search('.', {'pageno': 1}) self.assertTrue(len(results) != 0) forbidden_paths = [ @@ -218,7 +219,7 @@ commit ''' '/var', ] for forbidden_path in forbidden_paths: - self.assertRaises(ValueError, ls_engine.search, '..'.encode(), {'pageno': 1}) + self.assertRaises(ValueError, ls_engine.search, forbidden_path, {'pageno': 1}) def test_enum_queries(self): echo_engine = command_engine @@ -227,7 +228,7 @@ commit ''' echo_engine.query_enum = ['i-am-allowed-to-say-this', 'and-that'] for allowed in echo_engine.query_enum: - results = echo_engine.search(allowed.encode(), {'pageno': 1}) + results = echo_engine.search(allowed, {'pageno': 1}) self.assertTrue(len(results) != 0) forbidden_queries = [ @@ -236,4 +237,4 @@ commit ''' 'prohibited', ] for forbidden in forbidden_queries: - self.assertRaises(ValueError, echo_engine.search, forbidden.encode(), {'pageno': 1}) + self.assertRaises(ValueError, echo_engine.search, forbidden, {'pageno': 1}) diff --git a/tests/unit/engines/test_xpath.py b/tests/unit/engines/test_xpath.py index 24f14127b..380dd1d6c 100644 --- a/tests/unit/engines/test_xpath.py +++ b/tests/unit/engines/test_xpath.py @@ -7,25 +7,43 @@ from searx.engines import xpath from tests import SearxTestCase -class TestXpathEngine(SearxTestCase): +class TestXpathEngine(SearxTestCase): # pylint: disable=missing-class-docstring + html = """ +
+
+ Result 1 +

Content 1

+ Cache +
+
+ Result 2 +

Content 2

+ Cache +
+
+ """ + def test_request(self): xpath.search_url = 'https://url.com/{query}' xpath.categories = [] xpath.paging = False query = 'test_query' dicto = defaultdict(dict) + dicto['language'] = 'all' + dicto['pageno'] = 1 params = xpath.request(query, dicto) self.assertIn('url', params) - self.assertEquals('https://url.com/test_query', params['url']) + self.assertEqual('https://url.com/test_query', params['url']) xpath.search_url = 'https://url.com/q={query}&p={pageno}' xpath.paging = True query = 'test_query' dicto = defaultdict(dict) + dicto['language'] = 'all' dicto['pageno'] = 1 params = xpath.request(query, dicto) self.assertIn('url', params) - self.assertEquals('https://url.com/q=test_query&p=1', params['url']) + self.assertEqual('https://url.com/q=test_query&p=1', params['url']) def test_response(self): # without results_xpath @@ -38,24 +56,10 @@ class TestXpathEngine(SearxTestCase): self.assertRaises(AttributeError, xpath.response, '') self.assertRaises(AttributeError, xpath.response, '[]') - response = mock.Mock(text='') + response = mock.Mock(text='', status_code=200) self.assertEqual(xpath.response(response), []) - html = u""" -
-
- Result 1 -

Content 1

- Cache -
-
- Result 2 -

Content 2

- Cache -
-
- """ - response = mock.Mock(text=html) + response = mock.Mock(text=self.html, status_code=200) results = xpath.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -80,6 +84,7 @@ class TestXpathEngine(SearxTestCase): results = xpath.response(response) self.assertTrue(results[0]['is_onion']) + def test_response_results_xpath(self): # with results_xpath xpath.results_xpath = '//div[@class="search_result"]' xpath.url_xpath = './/a[@class="result"]/@href' @@ -93,10 +98,10 @@ class TestXpathEngine(SearxTestCase): self.assertRaises(AttributeError, xpath.response, '') self.assertRaises(AttributeError, xpath.response, '[]') - response = mock.Mock(text='') + response = mock.Mock(text='', status_code=200) self.assertEqual(xpath.response(response), []) - response = mock.Mock(text=html) + response = mock.Mock(text=self.html, status_code=200) results = xpath.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index b4f5f8a0d..4c609760e 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -247,6 +247,7 @@ class TestBang(SearxTestCase): # pylint:disable=missing-class-docstring self.assertEqual(query.user_query_parts, TestBang.THE_QUERY.split(' ')) def test_specific(self): + load_engines(TEST_ENGINES) for bang in TestBang.SPECIFIC_BANGS: with self.subTest(msg="Check bang is specific", bang=bang): query_text = TestBang.THE_QUERY + ' ' + bang From 9a9ca307fe53ea8ed9d18a06a1a7da9fa4a1c28f Mon Sep 17 00:00:00 2001 From: Grant Lanham Date: Sun, 9 Jun 2024 14:22:20 -0400 Subject: [PATCH 2/2] [fix] implement tests and remove usage of gen_useragent in engines --- searx/engines/mojeek.py | 3 +- tests/unit/processors/__init__.py | 2 ++ tests/unit/processors/test_online.py | 53 ++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 tests/unit/processors/__init__.py create mode 100644 tests/unit/processors/test_online.py diff --git a/searx/engines/mojeek.py b/searx/engines/mojeek.py index 585a48ff8..6aaca021b 100644 --- a/searx/engines/mojeek.py +++ b/searx/engines/mojeek.py @@ -6,7 +6,7 @@ from urllib.parse import urlencode from lxml import html from dateutil.relativedelta import relativedelta -from searx.utils import eval_xpath, eval_xpath_list, extract_text, gen_useragent +from searx.utils import eval_xpath, eval_xpath_list, extract_text about = { 'website': 'https://mojeek.com', @@ -63,7 +63,6 @@ def request(query, params): logger.debug(args["since"]) params['url'] = f"{base_url}/search?{urlencode(args)}" - params['headers'] = {'User-Agent': gen_useragent()} return params diff --git a/tests/unit/processors/__init__.py b/tests/unit/processors/__init__.py new file mode 100644 index 000000000..9ed59c825 --- /dev/null +++ b/tests/unit/processors/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring diff --git a/tests/unit/processors/test_online.py b/tests/unit/processors/test_online.py new file mode 100644 index 000000000..10e0deb97 --- /dev/null +++ b/tests/unit/processors/test_online.py @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring + +from searx.search import SearchQuery, EngineRef +from searx.search.processors import online +from searx.engines import load_engines +from searx import engines + +from tests import SearxTestCase + +TEST_ENGINE_NAME = 'dummy engine' +TEST_ENGINE = { + 'name': TEST_ENGINE_NAME, + 'engine': 'dummy', + 'categories': 'general', + 'shortcut': 'du', + 'timeout': 3.0, + 'tokens': [], +} + + +class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docstring + + def setUp(self): + load_engines([TEST_ENGINE]) + + def tearDown(self): + load_engines([]) + + def _get_params(self, online_processor, search_query, engine_category): + params = online_processor.get_params(search_query, engine_category) + self.assertIsNotNone(params) + assert params is not None + return params + + def test_get_params_default_params(self): + engine = engines.engines[TEST_ENGINE_NAME] + online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME) + search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None) + params = self._get_params(online_processor, search_query, 'general') + self.assertIn('method', params) + self.assertIn('headers', params) + self.assertIn('data', params) + self.assertIn('url', params) + self.assertIn('cookies', params) + self.assertIn('auth', params) + + def test_get_params_useragent(self): + engine = engines.engines[TEST_ENGINE_NAME] + online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME) + search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None) + params = self._get_params(online_processor, search_query, 'general') + self.assertIn('User-Agent', params['headers'])