Merge branch 'searxng:master' into elasticsearch-custom-query

This commit is contained in:
frob 2024-06-23 14:24:06 +02:00 committed by GitHub
commit 3d139086c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 100 additions and 37 deletions

View File

@ -6,7 +6,7 @@ from urllib.parse import urlencode
from lxml import html
from dateutil.relativedelta import relativedelta
from searx.utils import eval_xpath, eval_xpath_list, extract_text, gen_useragent
from searx.utils import eval_xpath, eval_xpath_list, extract_text
about = {
'website': 'https://mojeek.com',
@ -63,7 +63,6 @@ def request(query, params):
logger.debug(args["since"])
params['url'] = f"{base_url}/search?{urlencode(args)}"
params['headers'] = {'User-Agent': gen_useragent()}
return params

View File

@ -0,0 +1,2 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring

View File

@ -21,7 +21,7 @@ from searx.engines import command as command_engine
from tests import SearxTestCase
class TestCommandEngine(SearxTestCase):
class TestCommandEngine(SearxTestCase): # pylint: disable=missing-class-docstring
def test_basic_seq_command_engine(self):
ls_engine = command_engine
ls_engine.command = ['seq', '{{QUERY}}']
@ -33,10 +33,10 @@ class TestCommandEngine(SearxTestCase):
{'number': '4', 'template': 'key-value.html'},
{'number': '5', 'template': 'key-value.html'},
]
results = ls_engine.search('5'.encode('utf-8'), {'pageno': 1})
results = ls_engine.search('5', {'pageno': 1})
self.assertEqual(results, expected_results)
def test_delimiter_parsing_command_engine(self):
def test_delimiter_parsing(self):
searx_logs = '''DEBUG:searx.webapp:static directory is /home/n/p/searx/searx/static
DEBUG:searx.webapp:templates directory is /home/n/p/searx/searx/templates
DEBUG:searx.engines:soundcloud engine: Starting background initialization
@ -140,10 +140,10 @@ INFO:werkzeug: * Debugger PIN: 299-578-362'''
]
for i in [0, 1]:
results = echo_engine.search(''.encode('utf-8'), {'pageno': i + 1})
results = echo_engine.search('', {'pageno': i + 1})
self.assertEqual(results, expected_results_by_page[i])
def test_regex_parsing_command_engine(self):
def test_regex_parsing(self):
txt = '''commit 35f9a8c81d162a361b826bbcd4a1081a4fbe76a7
Author: Noémi Ványi <sitbackandwait@gmail.com>
Date: Tue Oct 15 11:31:33 2019 +0200
@ -168,11 +168,12 @@ commit '''
git_log_engine.result_separator = '\n\ncommit '
git_log_engine.delimiter = {}
git_log_engine.parse_regex = {
'commit': '\w{40}',
'author': '[\w* ]* <\w*@?\w*\.?\w*>',
'date': 'Date: .*',
'message': '\n\n.*$',
'commit': r'\w{40}',
'author': r'[\w* ]* <\w*@?\w*\.?\w*>',
'date': r'Date: .*',
'message': r'\n\n.*$',
}
git_log_engine.init({"command": git_log_engine.command, "parse_regex": git_log_engine.parse_regex})
expected_results = [
{
'commit': '35f9a8c81d162a361b826bbcd4a1081a4fbe76a7',
@ -197,7 +198,7 @@ commit '''
},
]
results = git_log_engine.search(''.encode('utf-8'), {'pageno': 1})
results = git_log_engine.search('', {'pageno': 1})
self.assertEqual(results, expected_results)
def test_working_dir_path_query(self):
@ -207,7 +208,7 @@ commit '''
ls_engine.delimiter = {'chars': ' ', 'keys': ['file']}
ls_engine.query_type = 'path'
results = ls_engine.search('.'.encode(), {'pageno': 1})
results = ls_engine.search('.', {'pageno': 1})
self.assertTrue(len(results) != 0)
forbidden_paths = [
@ -218,7 +219,7 @@ commit '''
'/var',
]
for forbidden_path in forbidden_paths:
self.assertRaises(ValueError, ls_engine.search, '..'.encode(), {'pageno': 1})
self.assertRaises(ValueError, ls_engine.search, forbidden_path, {'pageno': 1})
def test_enum_queries(self):
echo_engine = command_engine
@ -227,7 +228,7 @@ commit '''
echo_engine.query_enum = ['i-am-allowed-to-say-this', 'and-that']
for allowed in echo_engine.query_enum:
results = echo_engine.search(allowed.encode(), {'pageno': 1})
results = echo_engine.search(allowed, {'pageno': 1})
self.assertTrue(len(results) != 0)
forbidden_queries = [
@ -236,4 +237,4 @@ commit '''
'prohibited',
]
for forbidden in forbidden_queries:
self.assertRaises(ValueError, echo_engine.search, forbidden.encode(), {'pageno': 1})
self.assertRaises(ValueError, echo_engine.search, forbidden, {'pageno': 1})

View File

@ -7,25 +7,43 @@ from searx.engines import xpath
from tests import SearxTestCase
class TestXpathEngine(SearxTestCase):
class TestXpathEngine(SearxTestCase): # pylint: disable=missing-class-docstring
html = """
<div>
<div class="search_result">
<a class="result" href="https://result1.com">Result 1</a>
<p class="content">Content 1</p>
<a class="cached" href="https://cachedresult1.com">Cache</a>
</div>
<div class="search_result">
<a class="result" href="https://result2.com">Result 2</a>
<p class="content">Content 2</p>
<a class="cached" href="https://cachedresult2.com">Cache</a>
</div>
</div>
"""
def test_request(self):
xpath.search_url = 'https://url.com/{query}'
xpath.categories = []
xpath.paging = False
query = 'test_query'
dicto = defaultdict(dict)
dicto['language'] = 'all'
dicto['pageno'] = 1
params = xpath.request(query, dicto)
self.assertIn('url', params)
self.assertEquals('https://url.com/test_query', params['url'])
self.assertEqual('https://url.com/test_query', params['url'])
xpath.search_url = 'https://url.com/q={query}&p={pageno}'
xpath.paging = True
query = 'test_query'
dicto = defaultdict(dict)
dicto['language'] = 'all'
dicto['pageno'] = 1
params = xpath.request(query, dicto)
self.assertIn('url', params)
self.assertEquals('https://url.com/q=test_query&p=1', params['url'])
self.assertEqual('https://url.com/q=test_query&p=1', params['url'])
def test_response(self):
# without results_xpath
@ -38,24 +56,10 @@ class TestXpathEngine(SearxTestCase):
self.assertRaises(AttributeError, xpath.response, '')
self.assertRaises(AttributeError, xpath.response, '[]')
response = mock.Mock(text='<html></html>')
response = mock.Mock(text='<html></html>', status_code=200)
self.assertEqual(xpath.response(response), [])
html = u"""
<div>
<div class="search_result">
<a class="result" href="https://result1.com">Result 1</a>
<p class="content">Content 1</p>
<a class="cached" href="https://cachedresult1.com">Cache</a>
</div>
<div class="search_result">
<a class="result" href="https://result2.com">Result 2</a>
<p class="content">Content 2</p>
<a class="cached" href="https://cachedresult2.com">Cache</a>
</div>
</div>
"""
response = mock.Mock(text=html)
response = mock.Mock(text=self.html, status_code=200)
results = xpath.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
@ -80,6 +84,7 @@ class TestXpathEngine(SearxTestCase):
results = xpath.response(response)
self.assertTrue(results[0]['is_onion'])
def test_response_results_xpath(self):
# with results_xpath
xpath.results_xpath = '//div[@class="search_result"]'
xpath.url_xpath = './/a[@class="result"]/@href'
@ -93,10 +98,10 @@ class TestXpathEngine(SearxTestCase):
self.assertRaises(AttributeError, xpath.response, '')
self.assertRaises(AttributeError, xpath.response, '[]')
response = mock.Mock(text='<html></html>')
response = mock.Mock(text='<html></html>', status_code=200)
self.assertEqual(xpath.response(response), [])
response = mock.Mock(text=html)
response = mock.Mock(text=self.html, status_code=200)
results = xpath.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)

View File

@ -0,0 +1,2 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring

View File

@ -0,0 +1,53 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
from searx.search import SearchQuery, EngineRef
from searx.search.processors import online
from searx.engines import load_engines
from searx import engines
from tests import SearxTestCase
TEST_ENGINE_NAME = 'dummy engine'
TEST_ENGINE = {
'name': TEST_ENGINE_NAME,
'engine': 'dummy',
'categories': 'general',
'shortcut': 'du',
'timeout': 3.0,
'tokens': [],
}
class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self):
load_engines([TEST_ENGINE])
def tearDown(self):
load_engines([])
def _get_params(self, online_processor, search_query, engine_category):
params = online_processor.get_params(search_query, engine_category)
self.assertIsNotNone(params)
assert params is not None
return params
def test_get_params_default_params(self):
engine = engines.engines[TEST_ENGINE_NAME]
online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME)
search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None)
params = self._get_params(online_processor, search_query, 'general')
self.assertIn('method', params)
self.assertIn('headers', params)
self.assertIn('data', params)
self.assertIn('url', params)
self.assertIn('cookies', params)
self.assertIn('auth', params)
def test_get_params_useragent(self):
engine = engines.engines[TEST_ENGINE_NAME]
online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME)
search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None)
params = self._get_params(online_processor, search_query, 'general')
self.assertIn('User-Agent', params['headers'])

View File

@ -247,6 +247,7 @@ class TestBang(SearxTestCase): # pylint:disable=missing-class-docstring
self.assertEqual(query.user_query_parts, TestBang.THE_QUERY.split(' '))
def test_specific(self):
load_engines(TEST_ENGINES)
for bang in TestBang.SPECIFIC_BANGS:
with self.subTest(msg="Check bang is specific", bang=bang):
query_text = TestBang.THE_QUERY + ' ' + bang