This commit is contained in:
Allen 2024-11-06 16:14:00 +01:00 committed by GitHub
commit 890c2c1b59
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 85 additions and 57 deletions

View File

@ -12,7 +12,6 @@ from searx import logger
from searx.engines import engines from searx.engines import engines
from searx.metrics import histogram_observe, counter_add, count_error from searx.metrics import histogram_observe, counter_add, count_error
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@ -133,7 +132,7 @@ def result_score(result, priority):
weight = 1.0 weight = 1.0
for result_engine in result['engines']: for result_engine in result['engines']:
if hasattr(engines[result_engine], 'weight'): if hasattr(engines.get(result_engine), 'weight'):
weight *= float(engines[result_engine].weight) weight *= float(engines[result_engine].weight)
weight *= len(result['positions']) weight *= len(result['positions'])
@ -332,10 +331,14 @@ class ResultContainer:
return None return None
def __merge_duplicated_http_result(self, duplicated, result, position): def __merge_duplicated_http_result(self, duplicated, result, position):
# using content with more text # use content with more text
if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')): if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
duplicated['content'] = result['content'] duplicated['content'] = result['content']
# use title with more text
if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')):
duplicated['title'] = result['title']
# merge all result's parameters not found in duplicate # merge all result's parameters not found in duplicate
for key in result.keys(): for key in result.keys():
if not duplicated.get(key): if not duplicated.get(key):
@ -347,7 +350,7 @@ class ResultContainer:
# add engine to list of result-engines # add engine to list of result-engines
duplicated['engines'].add(result['engine']) duplicated['engines'].add(result['engine'])
# using https if possible # use https if possible
if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https': if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
duplicated['url'] = result['parsed_url'].geturl() duplicated['url'] = result['parsed_url'].geturl()
duplicated['parsed_url'] = result['parsed_url'] duplicated['parsed_url'] = result['parsed_url']

View File

@ -3,7 +3,7 @@
from searx.search import SearchQuery, EngineRef from searx.search import SearchQuery, EngineRef
from searx.search.processors import online from searx.search.processors import online
from searx.engines import load_engines import searx.search
from searx import engines from searx import engines
from tests import SearxTestCase from tests import SearxTestCase
@ -22,10 +22,10 @@ TEST_ENGINE = {
class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docstring class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self): def setUp(self):
load_engines([TEST_ENGINE]) searx.search.initialize([TEST_ENGINE])
def tearDown(self): def tearDown(self):
load_engines([]) searx.search.load_engines([])
def _get_params(self, online_processor, search_query, engine_category): def _get_params(self, online_processor, search_query, engine_category):
params = online_processor.get_params(search_query, engine_category) params = online_processor.get_params(search_query, engine_category)

View File

@ -2,26 +2,11 @@
# pylint: disable=missing-module-docstring # pylint: disable=missing-module-docstring
from unittest.mock import MagicMock, Mock from unittest.mock import MagicMock, Mock
from searx.engines import load_engines, mariadb_server from searx.engines import mariadb_server
from tests import SearxTestCase from tests import SearxTestCase
class MariadbServerTests(SearxTestCase): # pylint: disable=missing-class-docstring class MariadbServerTests(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self):
load_engines(
[
{
'name': 'mariadb server',
'engine': 'mariadb_server',
'shortcut': 'mdb',
'timeout': 9.0,
'disabled': True,
}
]
)
def tearDown(self):
load_engines([])
def test_init_no_query_str_raises(self): def test_init_no_query_str_raises(self):
self.assertRaises(ValueError, lambda: mariadb_server.init({})) self.assertRaises(ValueError, lambda: mariadb_server.init({}))

View File

@ -1,28 +1,34 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring # pylint: disable=missing-module-docstring
import logging
from datetime import datetime from datetime import datetime
from unittest.mock import Mock from unittest.mock import Mock
from requests import HTTPError from requests import HTTPError
from parameterized import parameterized from parameterized import parameterized
from searx.engines import load_engines, tineye import searx.search
import searx.engines
from tests import SearxTestCase from tests import SearxTestCase
class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self): def setUp(self):
load_engines([{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}]) searx.search.initialize(
[{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}]
)
self.tineye = searx.engines.engines['tineye']
self.tineye.logger.setLevel(logging.CRITICAL)
def tearDown(self): def tearDown(self):
load_engines([]) searx.search.load_engines([])
def test_status_code_raises(self): def test_status_code_raises(self):
response = Mock() response = Mock()
response.status_code = 401 response.status_code = 401
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
self.assertRaises(HTTPError, lambda: tineye.response(response)) self.assertRaises(HTTPError, lambda: self.tineye.response(response))
@parameterized.expand([(400), (422)]) @parameterized.expand([(400), (422)])
def test_returns_empty_list(self, status_code): def test_returns_empty_list(self, status_code):
@ -30,7 +36,7 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
response.json.return_value = {} response.json.return_value = {}
response.status_code = status_code response.status_code = status_code
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
results = tineye.response(response) results = self.tineye.response(response)
self.assertEqual(0, len(results)) self.assertEqual(0, len(results))
def test_logs_format_for_422(self): def test_logs_format_for_422(self):
@ -39,9 +45,9 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
response.status_code = 422 response.status_code = 422
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context: with self.assertLogs(self.tineye.logger) as assert_logs_context:
tineye.response(response) self.tineye.response(response)
self.assertIn(tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output)) self.assertIn(self.tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output))
def test_logs_signature_for_422(self): def test_logs_signature_for_422(self):
response = Mock() response = Mock()
@ -49,9 +55,9 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
response.status_code = 422 response.status_code = 422
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context: with self.assertLogs(self.tineye.logger) as assert_logs_context:
tineye.response(response) self.tineye.response(response)
self.assertIn(tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output)) self.assertIn(self.tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output))
def test_logs_download_for_422(self): def test_logs_download_for_422(self):
response = Mock() response = Mock()
@ -59,9 +65,9 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
response.status_code = 422 response.status_code = 422
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context: with self.assertLogs(self.tineye.logger) as assert_logs_context:
tineye.response(response) self.tineye.response(response)
self.assertIn(tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output)) self.assertIn(self.tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output))
def test_logs_description_for_400(self): def test_logs_description_for_400(self):
description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645' description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645'
@ -70,8 +76,8 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
response.status_code = 400 response.status_code = 400
response.raise_for_status.side_effect = HTTPError() response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context: with self.assertLogs(self.tineye.logger) as assert_logs_context:
tineye.response(response) self.tineye.response(response)
self.assertIn(description, ','.join(assert_logs_context.output)) self.assertIn(description, ','.join(assert_logs_context.output))
def test_crawl_date_parses(self): def test_crawl_date_parses(self):
@ -90,5 +96,5 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
] ]
} }
response.status_code = 200 response.status_code = 200
results = tineye.response(response) results = self.tineye.response(response)
self.assertEqual(date, results[0]['publishedDate']) self.assertEqual(date, results[0]['publishedDate'])

View File

@ -2,7 +2,7 @@
# pylint: disable=missing-module-docstring # pylint: disable=missing-module-docstring
from parameterized.parameterized import parameterized from parameterized.parameterized import parameterized
from searx.engines import load_engines import searx.search
from searx.query import RawTextQuery from searx.query import RawTextQuery
from tests import SearxTestCase from tests import SearxTestCase
@ -218,10 +218,10 @@ class TestBang(SearxTestCase): # pylint:disable=missing-class-docstring
THE_QUERY = 'the query' THE_QUERY = 'the query'
def setUp(self): def setUp(self):
load_engines(TEST_ENGINES) searx.search.initialize(TEST_ENGINES)
def tearDown(self): def tearDown(self):
load_engines([]) searx.search.load_engines([])
@parameterized.expand(SPECIFIC_BANGS) @parameterized.expand(SPECIFIC_BANGS)
def test_bang(self, bang: str): def test_bang(self, bang: str):

View File

@ -2,9 +2,26 @@
# pylint: disable=missing-module-docstring # pylint: disable=missing-module-docstring
from searx.results import ResultContainer from searx.results import ResultContainer
import searx.search
from tests import SearxTestCase from tests import SearxTestCase
def make_test_engine_dict(**kwargs) -> dict:
test_engine = {
# fmt: off
'name': None,
'engine': None,
'categories': 'general',
'shortcut': 'dummy',
'timeout': 3.0,
'tokens': [],
# fmt: on
}
test_engine.update(**kwargs)
return test_engine
def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs): def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs):
result = { result = {
# fmt: off # fmt: off
@ -19,23 +36,40 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', eng
class ResultContainerTestCase(SearxTestCase): # pylint: disable=missing-class-docstring class ResultContainerTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self) -> None:
stract_engine = make_test_engine_dict(name="stract", engine="stract", shortcut="stra")
duckduckgo_engine = make_test_engine_dict(name="duckduckgo", engine="duckduckgo", shortcut="ddg")
mojeek_engine = make_test_engine_dict(name="mojeek", engine="mojeek", shortcut="mjk")
searx.search.initialize([stract_engine, duckduckgo_engine, mojeek_engine])
self.container = ResultContainer()
def tearDown(self):
searx.search.load_engines([])
def test_empty(self): def test_empty(self):
c = ResultContainer() self.assertEqual(self.container.get_ordered_results(), [])
self.assertEqual(c.get_ordered_results(), [])
def test_one_result(self): def test_one_result(self):
c = ResultContainer() self.container.extend('wikipedia', [fake_result()])
c.extend('wikipedia', [fake_result()])
self.assertEqual(c.results_length(), 1) self.assertEqual(self.container.results_length(), 1)
def test_one_suggestion(self): def test_one_suggestion(self):
c = ResultContainer() self.container.extend('wikipedia', [fake_result(suggestion=True)])
c.extend('wikipedia', [fake_result(suggestion=True)])
self.assertEqual(len(c.suggestions), 1) self.assertEqual(len(self.container.suggestions), 1)
self.assertEqual(c.results_length(), 0) self.assertEqual(self.container.results_length(), 0)
def test_result_merge(self): def test_result_merge(self):
c = ResultContainer() self.container.extend('wikipedia', [fake_result()])
c.extend('wikipedia', [fake_result()]) self.container.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
self.assertEqual(c.results_length(), 2) self.assertEqual(self.container.results_length(), 2)
def test_result_merge_by_title(self):
self.container.extend('stract', [fake_result(engine='stract', title='short title')])
self.container.extend('duckduckgo', [fake_result(engine='duckduckgo', title='normal title')])
self.container.extend('mojeek', [fake_result(engine='mojeek', title='this long long title')])
self.assertEqual(self.container.get_ordered_results()[0].get('title', ''), 'this long long title')