Compare commits

...

3 Commits

Author SHA1 Message Date
Allen 3e69a68dcd
Merge 21dd524a12 into dfaf5868e2 2024-11-13 01:01:47 +01:00
Markus Heiser 21dd524a12 [fix] unit tests: call searx.search.initialize in test's setUp
Depending on the order the unit tests are executed, the searx.search module is
initalized or not, issue reported in [1]::

    Traceback (most recent call last):
      File "searxng/tests/unit/test_results.py", line 72, in test_result_merge_by_title
        self.container.extend('stract', [fake_result(engine='stract', title='short title')])
      File "searxng/searx/results.py", line 243, in extend
        histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count')
      File "searxng/searx/metrics/__init__.py", line 49, in histogram_observe
        histogram_storage.get(*args).observe(duration)
        ^^^^^^^^^^^^^^^^^^^^^
      AttributeError: 'NoneType' object has no attribute 'get'

To ensure that the searx.search module is initialized, the

- searx.engines.load_engines is replace by
- searx.search.initialize

[1] https://github.com/searxng/searxng/pull/3932#discussion_r1822406569

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-10-30 14:33:52 +01:00
Allen 0476de443e [enh] use longest title and test get_ordered_results() 2024-10-30 14:33:52 +01:00
6 changed files with 85 additions and 57 deletions

View File

@ -12,7 +12,6 @@ from searx import logger
from searx.engines import engines from searx.engines import engines
from searx.metrics import histogram_observe, counter_add, count_error from searx.metrics import histogram_observe, counter_add, count_error
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@ -133,7 +132,7 @@ def result_score(result, priority):
weight = 1.0 weight = 1.0
for result_engine in result['engines']: for result_engine in result['engines']:
if hasattr(engines[result_engine], 'weight'): if hasattr(engines.get(result_engine), 'weight'):
weight *= float(engines[result_engine].weight) weight *= float(engines[result_engine].weight)
weight *= len(result['positions']) weight *= len(result['positions'])
@ -332,10 +331,14 @@ class ResultContainer:
return None return None
def __merge_duplicated_http_result(self, duplicated, result, position): def __merge_duplicated_http_result(self, duplicated, result, position):
# using content with more text # use content with more text
if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')): if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
duplicated['content'] = result['content'] duplicated['content'] = result['content']
# use title with more text
if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')):
duplicated['title'] = result['title']
# merge all result's parameters not found in duplicate # merge all result's parameters not found in duplicate
for key in result.keys(): for key in result.keys():
if not duplicated.get(key): if not duplicated.get(key):
@ -347,7 +350,7 @@ class ResultContainer:
# add engine to list of result-engines # add engine to list of result-engines
duplicated['engines'].add(result['engine']) duplicated['engines'].add(result['engine'])
# using https if possible # use https if possible
if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https': if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
duplicated['url'] = result['parsed_url'].geturl() duplicated['url'] = result['parsed_url'].geturl()
duplicated['parsed_url'] = result['parsed_url'] duplicated['parsed_url'] = result['parsed_url']

View File

@ -3,7 +3,7 @@
from searx.search import SearchQuery, EngineRef from searx.search import SearchQuery, EngineRef
from searx.search.processors import online from searx.search.processors import online
from searx.engines import load_engines import searx.search
from searx import engines from searx import engines
from tests import SearxTestCase from tests import SearxTestCase
@ -22,10 +22,10 @@ TEST_ENGINE = {
class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docstring class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self): def setUp(self):
load_engines([TEST_ENGINE]) searx.search.initialize([TEST_ENGINE])
def tearDown(self): def tearDown(self):
load_engines([]) searx.search.load_engines([])
def _get_params(self, online_processor, search_query, engine_category): def _get_params(self, online_processor, search_query, engine_category):
params = online_processor.get_params(search_query, engine_category) params = online_processor.get_params(search_query, engine_category)

View File

@ -2,26 +2,11 @@
# pylint: disable=missing-module-docstring # pylint: disable=missing-module-docstring
from unittest.mock import MagicMock, Mock from unittest.mock import MagicMock, Mock
from searx.engines import load_engines, mariadb_server from searx.engines import mariadb_server
from tests import SearxTestCase from tests import SearxTestCase
class MariadbServerTests(SearxTestCase): # pylint: disable=missing-class-docstring class MariadbServerTests(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self):
load_engines(
[
{
'name': 'mariadb server',
'engine': 'mariadb_server',
'shortcut': 'mdb',
'timeout': 9.0,
'disabled': True,
}
]
)
def tearDown(self):
load_engines([])
def test_init_no_query_str_raises(self): def test_init_no_query_str_raises(self):
self.assertRaises(ValueError, lambda: mariadb_server.init({})) self.assertRaises(ValueError, lambda: mariadb_server.init({}))

View File

@ -1,28 +1,34 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring # pylint: disable=missing-module-docstring
import logging
from datetime import datetime from datetime import datetime
from unittest.mock import Mock from unittest.mock import Mock
from requests import HTTPError from requests import HTTPError
from parameterized import parameterized from parameterized import parameterized
from searx.engines import load_engines, tineye import searx.search
import searx.engines
from tests import SearxTestCase from tests import SearxTestCase
class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self): def setUp(self):
load_engines([{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}]) searx.search.initialize(
[{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}]
)
self.tineye = searx.engines.engines['tineye']
self.tineye.logger.setLevel(logging.CRITICAL)
def tearDown(self): def tearDown(self):
load_engines([]) searx.search.load_engines([])
def test_status_code_raises(self): def test_status_code_raises(self):
response = Mock() response = Mock()
response.status_code = 401 response.status_code = 401
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
self.assertRaises(HTTPError, lambda: tineye.response(response)) self.assertRaises(HTTPError, lambda: self.tineye.response(response))
@parameterized.expand([(400), (422)]) @parameterized.expand([(400), (422)])
def test_returns_empty_list(self, status_code): def test_returns_empty_list(self, status_code):
@ -30,7 +36,7 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
response.json.return_value = {} response.json.return_value = {}
response.status_code = status_code response.status_code = status_code
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
results = tineye.response(response) results = self.tineye.response(response)
self.assertEqual(0, len(results)) self.assertEqual(0, len(results))
def test_logs_format_for_422(self): def test_logs_format_for_422(self):
@ -39,9 +45,9 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
response.status_code = 422 response.status_code = 422
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context: with self.assertLogs(self.tineye.logger) as assert_logs_context:
tineye.response(response) self.tineye.response(response)
self.assertIn(tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output)) self.assertIn(self.tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output))
def test_logs_signature_for_422(self): def test_logs_signature_for_422(self):
response = Mock() response = Mock()
@ -49,9 +55,9 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
response.status_code = 422 response.status_code = 422
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context: with self.assertLogs(self.tineye.logger) as assert_logs_context:
tineye.response(response) self.tineye.response(response)
self.assertIn(tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output)) self.assertIn(self.tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output))
def test_logs_download_for_422(self): def test_logs_download_for_422(self):
response = Mock() response = Mock()
@ -59,9 +65,9 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
response.status_code = 422 response.status_code = 422
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context: with self.assertLogs(self.tineye.logger) as assert_logs_context:
tineye.response(response) self.tineye.response(response)
self.assertIn(tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output)) self.assertIn(self.tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output))
def test_logs_description_for_400(self): def test_logs_description_for_400(self):
description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645' description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645'
@ -70,8 +76,8 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
response.status_code = 400 response.status_code = 400
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context: with self.assertLogs(self.tineye.logger) as assert_logs_context:
tineye.response(response) self.tineye.response(response)
self.assertIn(description, ','.join(assert_logs_context.output)) self.assertIn(description, ','.join(assert_logs_context.output))
def test_crawl_date_parses(self): def test_crawl_date_parses(self):
@ -90,5 +96,5 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
] ]
} }
response.status_code = 200 response.status_code = 200
results = tineye.response(response) results = self.tineye.response(response)
self.assertEqual(date, results[0]['publishedDate']) self.assertEqual(date, results[0]['publishedDate'])

View File

@ -2,7 +2,7 @@
# pylint: disable=missing-module-docstring # pylint: disable=missing-module-docstring
from parameterized.parameterized import parameterized from parameterized.parameterized import parameterized
from searx.engines import load_engines import searx.search
from searx.query import RawTextQuery from searx.query import RawTextQuery
from tests import SearxTestCase from tests import SearxTestCase
@ -218,10 +218,10 @@ class TestBang(SearxTestCase): # pylint:disable=missing-class-docstring
THE_QUERY = 'the query' THE_QUERY = 'the query'
def setUp(self): def setUp(self):
load_engines(TEST_ENGINES) searx.search.initialize(TEST_ENGINES)
def tearDown(self): def tearDown(self):
load_engines([]) searx.search.load_engines([])
@parameterized.expand(SPECIFIC_BANGS) @parameterized.expand(SPECIFIC_BANGS)
def test_bang(self, bang: str): def test_bang(self, bang: str):

View File

@ -2,9 +2,26 @@
# pylint: disable=missing-module-docstring # pylint: disable=missing-module-docstring
from searx.results import ResultContainer from searx.results import ResultContainer
import searx.search
from tests import SearxTestCase from tests import SearxTestCase
def make_test_engine_dict(**kwargs) -> dict:
test_engine = {
# fmt: off
'name': None,
'engine': None,
'categories': 'general',
'shortcut': 'dummy',
'timeout': 3.0,
'tokens': [],
# fmt: on
}
test_engine.update(**kwargs)
return test_engine
def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs): def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs):
result = { result = {
# fmt: off # fmt: off
@ -19,23 +36,40 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', eng
class ResultContainerTestCase(SearxTestCase): # pylint: disable=missing-class-docstring class ResultContainerTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self) -> None:
stract_engine = make_test_engine_dict(name="stract", engine="stract", shortcut="stra")
duckduckgo_engine = make_test_engine_dict(name="duckduckgo", engine="duckduckgo", shortcut="ddg")
mojeek_engine = make_test_engine_dict(name="mojeek", engine="mojeek", shortcut="mjk")
searx.search.initialize([stract_engine, duckduckgo_engine, mojeek_engine])
self.container = ResultContainer()
def tearDown(self):
searx.search.load_engines([])
def test_empty(self): def test_empty(self):
c = ResultContainer() self.assertEqual(self.container.get_ordered_results(), [])
self.assertEqual(c.get_ordered_results(), [])
def test_one_result(self): def test_one_result(self):
c = ResultContainer() self.container.extend('wikipedia', [fake_result()])
c.extend('wikipedia', [fake_result()])
self.assertEqual(c.results_length(), 1) self.assertEqual(self.container.results_length(), 1)
def test_one_suggestion(self): def test_one_suggestion(self):
c = ResultContainer() self.container.extend('wikipedia', [fake_result(suggestion=True)])
c.extend('wikipedia', [fake_result(suggestion=True)])
self.assertEqual(len(c.suggestions), 1) self.assertEqual(len(self.container.suggestions), 1)
self.assertEqual(c.results_length(), 0) self.assertEqual(self.container.results_length(), 0)
def test_result_merge(self): def test_result_merge(self):
c = ResultContainer() self.container.extend('wikipedia', [fake_result()])
c.extend('wikipedia', [fake_result()]) self.container.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
self.assertEqual(c.results_length(), 2) self.assertEqual(self.container.results_length(), 2)
def test_result_merge_by_title(self):
self.container.extend('stract', [fake_result(engine='stract', title='short title')])
self.container.extend('duckduckgo', [fake_result(engine='duckduckgo', title='normal title')])
self.container.extend('mojeek', [fake_result(engine='mojeek', title='this long long title')])
self.assertEqual(self.container.get_ordered_results()[0].get('title', ''), 'this long long title')