mirror of https://github.com/searxng/searxng.git
Compare commits
4 Commits
3e69a68dcd
...
567ad94310
Author | SHA1 | Date |
---|---|---|
Allen | 567ad94310 | |
Nicolas Dato | abd9b271bc | |
Markus Heiser | 21dd524a12 | |
Allen | 0476de443e |
|
@ -6,7 +6,7 @@ DuckDuckGo Lite
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
import re
|
import re
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode, quote_plus
|
||||||
import json
|
import json
|
||||||
import babel
|
import babel
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
@ -245,10 +245,12 @@ def request(query, params):
|
||||||
|
|
||||||
# Advanced search syntax ends in CAPTCHA
|
# Advanced search syntax ends in CAPTCHA
|
||||||
# https://duckduckgo.com/duckduckgo-help-pages/results/syntax/
|
# https://duckduckgo.com/duckduckgo-help-pages/results/syntax/
|
||||||
query = [
|
query = " ".join(
|
||||||
|
[
|
||||||
x.removeprefix("site:").removeprefix("intitle:").removeprefix("inurl:").removeprefix("filetype:")
|
x.removeprefix("site:").removeprefix("intitle:").removeprefix("inurl:").removeprefix("filetype:")
|
||||||
for x in query.split()
|
for x in query.split()
|
||||||
]
|
]
|
||||||
|
)
|
||||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
||||||
if eng_region == "wt-wt":
|
if eng_region == "wt-wt":
|
||||||
# https://html.duckduckgo.com/html sets an empty value for "all".
|
# https://html.duckduckgo.com/html sets an empty value for "all".
|
||||||
|
@ -261,7 +263,7 @@ def request(query, params):
|
||||||
|
|
||||||
params['url'] = url
|
params['url'] = url
|
||||||
params['method'] = 'POST'
|
params['method'] = 'POST'
|
||||||
params['data']['q'] = query
|
params['data']['q'] = quote_plus(query)
|
||||||
|
|
||||||
# The API is not documented, so we do some reverse engineering and emulate
|
# The API is not documented, so we do some reverse engineering and emulate
|
||||||
# what https://html.duckduckgo.com/html does when you press "next Page" link
|
# what https://html.duckduckgo.com/html does when you press "next Page" link
|
||||||
|
|
|
@ -12,7 +12,6 @@ from searx import logger
|
||||||
from searx.engines import engines
|
from searx.engines import engines
|
||||||
from searx.metrics import histogram_observe, counter_add, count_error
|
from searx.metrics import histogram_observe, counter_add, count_error
|
||||||
|
|
||||||
|
|
||||||
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
|
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
|
||||||
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||||
|
|
||||||
|
@ -133,7 +132,7 @@ def result_score(result, priority):
|
||||||
weight = 1.0
|
weight = 1.0
|
||||||
|
|
||||||
for result_engine in result['engines']:
|
for result_engine in result['engines']:
|
||||||
if hasattr(engines[result_engine], 'weight'):
|
if hasattr(engines.get(result_engine), 'weight'):
|
||||||
weight *= float(engines[result_engine].weight)
|
weight *= float(engines[result_engine].weight)
|
||||||
|
|
||||||
weight *= len(result['positions'])
|
weight *= len(result['positions'])
|
||||||
|
@ -332,10 +331,14 @@ class ResultContainer:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def __merge_duplicated_http_result(self, duplicated, result, position):
|
def __merge_duplicated_http_result(self, duplicated, result, position):
|
||||||
# using content with more text
|
# use content with more text
|
||||||
if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
|
if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
|
||||||
duplicated['content'] = result['content']
|
duplicated['content'] = result['content']
|
||||||
|
|
||||||
|
# use title with more text
|
||||||
|
if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')):
|
||||||
|
duplicated['title'] = result['title']
|
||||||
|
|
||||||
# merge all result's parameters not found in duplicate
|
# merge all result's parameters not found in duplicate
|
||||||
for key in result.keys():
|
for key in result.keys():
|
||||||
if not duplicated.get(key):
|
if not duplicated.get(key):
|
||||||
|
@ -347,7 +350,7 @@ class ResultContainer:
|
||||||
# add engine to list of result-engines
|
# add engine to list of result-engines
|
||||||
duplicated['engines'].add(result['engine'])
|
duplicated['engines'].add(result['engine'])
|
||||||
|
|
||||||
# using https if possible
|
# use https if possible
|
||||||
if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
|
if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
|
||||||
duplicated['url'] = result['parsed_url'].geturl()
|
duplicated['url'] = result['parsed_url'].geturl()
|
||||||
duplicated['parsed_url'] = result['parsed_url']
|
duplicated['parsed_url'] = result['parsed_url']
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
from searx.search import SearchQuery, EngineRef
|
from searx.search import SearchQuery, EngineRef
|
||||||
from searx.search.processors import online
|
from searx.search.processors import online
|
||||||
from searx.engines import load_engines
|
import searx.search
|
||||||
from searx import engines
|
from searx import engines
|
||||||
|
|
||||||
from tests import SearxTestCase
|
from tests import SearxTestCase
|
||||||
|
@ -22,10 +22,10 @@ TEST_ENGINE = {
|
||||||
class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docstring
|
class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
load_engines([TEST_ENGINE])
|
searx.search.initialize([TEST_ENGINE])
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
load_engines([])
|
searx.search.load_engines([])
|
||||||
|
|
||||||
def _get_params(self, online_processor, search_query, engine_category):
|
def _get_params(self, online_processor, search_query, engine_category):
|
||||||
params = online_processor.get_params(search_query, engine_category)
|
params = online_processor.get_params(search_query, engine_category)
|
||||||
|
|
|
@ -2,26 +2,11 @@
|
||||||
# pylint: disable=missing-module-docstring
|
# pylint: disable=missing-module-docstring
|
||||||
|
|
||||||
from unittest.mock import MagicMock, Mock
|
from unittest.mock import MagicMock, Mock
|
||||||
from searx.engines import load_engines, mariadb_server
|
from searx.engines import mariadb_server
|
||||||
from tests import SearxTestCase
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
class MariadbServerTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
class MariadbServerTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
def setUp(self):
|
|
||||||
load_engines(
|
|
||||||
[
|
|
||||||
{
|
|
||||||
'name': 'mariadb server',
|
|
||||||
'engine': 'mariadb_server',
|
|
||||||
'shortcut': 'mdb',
|
|
||||||
'timeout': 9.0,
|
|
||||||
'disabled': True,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
load_engines([])
|
|
||||||
|
|
||||||
def test_init_no_query_str_raises(self):
|
def test_init_no_query_str_raises(self):
|
||||||
self.assertRaises(ValueError, lambda: mariadb_server.init({}))
|
self.assertRaises(ValueError, lambda: mariadb_server.init({}))
|
||||||
|
|
|
@ -1,28 +1,34 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring
|
# pylint: disable=missing-module-docstring
|
||||||
|
|
||||||
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
from requests import HTTPError
|
from requests import HTTPError
|
||||||
from parameterized import parameterized
|
from parameterized import parameterized
|
||||||
from searx.engines import load_engines, tineye
|
import searx.search
|
||||||
|
import searx.engines
|
||||||
from tests import SearxTestCase
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
load_engines([{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}])
|
searx.search.initialize(
|
||||||
|
[{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.tineye = searx.engines.engines['tineye']
|
||||||
|
self.tineye.logger.setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
load_engines([])
|
searx.search.load_engines([])
|
||||||
|
|
||||||
def test_status_code_raises(self):
|
def test_status_code_raises(self):
|
||||||
response = Mock()
|
response = Mock()
|
||||||
response.status_code = 401
|
response.status_code = 401
|
||||||
response.raise_for_status.side_effect = HTTPError()
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
self.assertRaises(HTTPError, lambda: tineye.response(response))
|
self.assertRaises(HTTPError, lambda: self.tineye.response(response))
|
||||||
|
|
||||||
@parameterized.expand([(400), (422)])
|
@parameterized.expand([(400), (422)])
|
||||||
def test_returns_empty_list(self, status_code):
|
def test_returns_empty_list(self, status_code):
|
||||||
|
@ -30,7 +36,7 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
response.json.return_value = {}
|
response.json.return_value = {}
|
||||||
response.status_code = status_code
|
response.status_code = status_code
|
||||||
response.raise_for_status.side_effect = HTTPError()
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
results = tineye.response(response)
|
results = self.tineye.response(response)
|
||||||
self.assertEqual(0, len(results))
|
self.assertEqual(0, len(results))
|
||||||
|
|
||||||
def test_logs_format_for_422(self):
|
def test_logs_format_for_422(self):
|
||||||
|
@ -39,9 +45,9 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
response.status_code = 422
|
response.status_code = 422
|
||||||
response.raise_for_status.side_effect = HTTPError()
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
|
||||||
with self.assertLogs(tineye.logger) as assert_logs_context:
|
with self.assertLogs(self.tineye.logger) as assert_logs_context:
|
||||||
tineye.response(response)
|
self.tineye.response(response)
|
||||||
self.assertIn(tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output))
|
self.assertIn(self.tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output))
|
||||||
|
|
||||||
def test_logs_signature_for_422(self):
|
def test_logs_signature_for_422(self):
|
||||||
response = Mock()
|
response = Mock()
|
||||||
|
@ -49,9 +55,9 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
response.status_code = 422
|
response.status_code = 422
|
||||||
response.raise_for_status.side_effect = HTTPError()
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
|
||||||
with self.assertLogs(tineye.logger) as assert_logs_context:
|
with self.assertLogs(self.tineye.logger) as assert_logs_context:
|
||||||
tineye.response(response)
|
self.tineye.response(response)
|
||||||
self.assertIn(tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output))
|
self.assertIn(self.tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output))
|
||||||
|
|
||||||
def test_logs_download_for_422(self):
|
def test_logs_download_for_422(self):
|
||||||
response = Mock()
|
response = Mock()
|
||||||
|
@ -59,9 +65,9 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
response.status_code = 422
|
response.status_code = 422
|
||||||
response.raise_for_status.side_effect = HTTPError()
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
|
||||||
with self.assertLogs(tineye.logger) as assert_logs_context:
|
with self.assertLogs(self.tineye.logger) as assert_logs_context:
|
||||||
tineye.response(response)
|
self.tineye.response(response)
|
||||||
self.assertIn(tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output))
|
self.assertIn(self.tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output))
|
||||||
|
|
||||||
def test_logs_description_for_400(self):
|
def test_logs_description_for_400(self):
|
||||||
description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645'
|
description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645'
|
||||||
|
@ -70,8 +76,8 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
response.status_code = 400
|
response.status_code = 400
|
||||||
response.raise_for_status.side_effect = HTTPError()
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
|
||||||
with self.assertLogs(tineye.logger) as assert_logs_context:
|
with self.assertLogs(self.tineye.logger) as assert_logs_context:
|
||||||
tineye.response(response)
|
self.tineye.response(response)
|
||||||
self.assertIn(description, ','.join(assert_logs_context.output))
|
self.assertIn(description, ','.join(assert_logs_context.output))
|
||||||
|
|
||||||
def test_crawl_date_parses(self):
|
def test_crawl_date_parses(self):
|
||||||
|
@ -90,5 +96,5 @@ class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
response.status_code = 200
|
response.status_code = 200
|
||||||
results = tineye.response(response)
|
results = self.tineye.response(response)
|
||||||
self.assertEqual(date, results[0]['publishedDate'])
|
self.assertEqual(date, results[0]['publishedDate'])
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# pylint: disable=missing-module-docstring
|
# pylint: disable=missing-module-docstring
|
||||||
|
|
||||||
from parameterized.parameterized import parameterized
|
from parameterized.parameterized import parameterized
|
||||||
from searx.engines import load_engines
|
import searx.search
|
||||||
from searx.query import RawTextQuery
|
from searx.query import RawTextQuery
|
||||||
from tests import SearxTestCase
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
@ -218,10 +218,10 @@ class TestBang(SearxTestCase): # pylint:disable=missing-class-docstring
|
||||||
THE_QUERY = 'the query'
|
THE_QUERY = 'the query'
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
load_engines(TEST_ENGINES)
|
searx.search.initialize(TEST_ENGINES)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
load_engines([])
|
searx.search.load_engines([])
|
||||||
|
|
||||||
@parameterized.expand(SPECIFIC_BANGS)
|
@parameterized.expand(SPECIFIC_BANGS)
|
||||||
def test_bang(self, bang: str):
|
def test_bang(self, bang: str):
|
||||||
|
|
|
@ -2,9 +2,26 @@
|
||||||
# pylint: disable=missing-module-docstring
|
# pylint: disable=missing-module-docstring
|
||||||
|
|
||||||
from searx.results import ResultContainer
|
from searx.results import ResultContainer
|
||||||
|
import searx.search
|
||||||
from tests import SearxTestCase
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
def make_test_engine_dict(**kwargs) -> dict:
|
||||||
|
test_engine = {
|
||||||
|
# fmt: off
|
||||||
|
'name': None,
|
||||||
|
'engine': None,
|
||||||
|
'categories': 'general',
|
||||||
|
'shortcut': 'dummy',
|
||||||
|
'timeout': 3.0,
|
||||||
|
'tokens': [],
|
||||||
|
# fmt: on
|
||||||
|
}
|
||||||
|
|
||||||
|
test_engine.update(**kwargs)
|
||||||
|
return test_engine
|
||||||
|
|
||||||
|
|
||||||
def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs):
|
def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs):
|
||||||
result = {
|
result = {
|
||||||
# fmt: off
|
# fmt: off
|
||||||
|
@ -19,23 +36,40 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', eng
|
||||||
|
|
||||||
|
|
||||||
class ResultContainerTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
|
class ResultContainerTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
|
|
||||||
|
def setUp(self) -> None:
|
||||||
|
stract_engine = make_test_engine_dict(name="stract", engine="stract", shortcut="stra")
|
||||||
|
duckduckgo_engine = make_test_engine_dict(name="duckduckgo", engine="duckduckgo", shortcut="ddg")
|
||||||
|
mojeek_engine = make_test_engine_dict(name="mojeek", engine="mojeek", shortcut="mjk")
|
||||||
|
searx.search.initialize([stract_engine, duckduckgo_engine, mojeek_engine])
|
||||||
|
self.container = ResultContainer()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
searx.search.load_engines([])
|
||||||
|
|
||||||
def test_empty(self):
|
def test_empty(self):
|
||||||
c = ResultContainer()
|
self.assertEqual(self.container.get_ordered_results(), [])
|
||||||
self.assertEqual(c.get_ordered_results(), [])
|
|
||||||
|
|
||||||
def test_one_result(self):
|
def test_one_result(self):
|
||||||
c = ResultContainer()
|
self.container.extend('wikipedia', [fake_result()])
|
||||||
c.extend('wikipedia', [fake_result()])
|
|
||||||
self.assertEqual(c.results_length(), 1)
|
self.assertEqual(self.container.results_length(), 1)
|
||||||
|
|
||||||
def test_one_suggestion(self):
|
def test_one_suggestion(self):
|
||||||
c = ResultContainer()
|
self.container.extend('wikipedia', [fake_result(suggestion=True)])
|
||||||
c.extend('wikipedia', [fake_result(suggestion=True)])
|
|
||||||
self.assertEqual(len(c.suggestions), 1)
|
self.assertEqual(len(self.container.suggestions), 1)
|
||||||
self.assertEqual(c.results_length(), 0)
|
self.assertEqual(self.container.results_length(), 0)
|
||||||
|
|
||||||
def test_result_merge(self):
|
def test_result_merge(self):
|
||||||
c = ResultContainer()
|
self.container.extend('wikipedia', [fake_result()])
|
||||||
c.extend('wikipedia', [fake_result()])
|
self.container.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
|
||||||
c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
|
|
||||||
self.assertEqual(c.results_length(), 2)
|
self.assertEqual(self.container.results_length(), 2)
|
||||||
|
|
||||||
|
def test_result_merge_by_title(self):
|
||||||
|
self.container.extend('stract', [fake_result(engine='stract', title='short title')])
|
||||||
|
self.container.extend('duckduckgo', [fake_result(engine='duckduckgo', title='normal title')])
|
||||||
|
self.container.extend('mojeek', [fake_result(engine='mojeek', title='this long long title')])
|
||||||
|
|
||||||
|
self.assertEqual(self.container.get_ordered_results()[0].get('title', ''), 'this long long title')
|
||||||
|
|
Loading…
Reference in New Issue