mirror of https://github.com/searxng/searxng.git
Merge branch 'searxng:master' into elasticsearch-custom-query
This commit is contained in:
commit
3d139086c1
|
@ -6,7 +6,7 @@ from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from dateutil.relativedelta import relativedelta
|
from dateutil.relativedelta import relativedelta
|
||||||
from searx.utils import eval_xpath, eval_xpath_list, extract_text, gen_useragent
|
from searx.utils import eval_xpath, eval_xpath_list, extract_text
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
'website': 'https://mojeek.com',
|
'website': 'https://mojeek.com',
|
||||||
|
@ -63,7 +63,6 @@ def request(query, params):
|
||||||
logger.debug(args["since"])
|
logger.debug(args["since"])
|
||||||
|
|
||||||
params['url'] = f"{base_url}/search?{urlencode(args)}"
|
params['url'] = f"{base_url}/search?{urlencode(args)}"
|
||||||
params['headers'] = {'User-Agent': gen_useragent()}
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# pylint: disable=missing-module-docstring
|
|
@ -21,7 +21,7 @@ from searx.engines import command as command_engine
|
||||||
from tests import SearxTestCase
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestCommandEngine(SearxTestCase):
|
class TestCommandEngine(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
def test_basic_seq_command_engine(self):
|
def test_basic_seq_command_engine(self):
|
||||||
ls_engine = command_engine
|
ls_engine = command_engine
|
||||||
ls_engine.command = ['seq', '{{QUERY}}']
|
ls_engine.command = ['seq', '{{QUERY}}']
|
||||||
|
@ -33,10 +33,10 @@ class TestCommandEngine(SearxTestCase):
|
||||||
{'number': '4', 'template': 'key-value.html'},
|
{'number': '4', 'template': 'key-value.html'},
|
||||||
{'number': '5', 'template': 'key-value.html'},
|
{'number': '5', 'template': 'key-value.html'},
|
||||||
]
|
]
|
||||||
results = ls_engine.search('5'.encode('utf-8'), {'pageno': 1})
|
results = ls_engine.search('5', {'pageno': 1})
|
||||||
self.assertEqual(results, expected_results)
|
self.assertEqual(results, expected_results)
|
||||||
|
|
||||||
def test_delimiter_parsing_command_engine(self):
|
def test_delimiter_parsing(self):
|
||||||
searx_logs = '''DEBUG:searx.webapp:static directory is /home/n/p/searx/searx/static
|
searx_logs = '''DEBUG:searx.webapp:static directory is /home/n/p/searx/searx/static
|
||||||
DEBUG:searx.webapp:templates directory is /home/n/p/searx/searx/templates
|
DEBUG:searx.webapp:templates directory is /home/n/p/searx/searx/templates
|
||||||
DEBUG:searx.engines:soundcloud engine: Starting background initialization
|
DEBUG:searx.engines:soundcloud engine: Starting background initialization
|
||||||
|
@ -140,10 +140,10 @@ INFO:werkzeug: * Debugger PIN: 299-578-362'''
|
||||||
]
|
]
|
||||||
|
|
||||||
for i in [0, 1]:
|
for i in [0, 1]:
|
||||||
results = echo_engine.search(''.encode('utf-8'), {'pageno': i + 1})
|
results = echo_engine.search('', {'pageno': i + 1})
|
||||||
self.assertEqual(results, expected_results_by_page[i])
|
self.assertEqual(results, expected_results_by_page[i])
|
||||||
|
|
||||||
def test_regex_parsing_command_engine(self):
|
def test_regex_parsing(self):
|
||||||
txt = '''commit 35f9a8c81d162a361b826bbcd4a1081a4fbe76a7
|
txt = '''commit 35f9a8c81d162a361b826bbcd4a1081a4fbe76a7
|
||||||
Author: Noémi Ványi <sitbackandwait@gmail.com>
|
Author: Noémi Ványi <sitbackandwait@gmail.com>
|
||||||
Date: Tue Oct 15 11:31:33 2019 +0200
|
Date: Tue Oct 15 11:31:33 2019 +0200
|
||||||
|
@ -168,11 +168,12 @@ commit '''
|
||||||
git_log_engine.result_separator = '\n\ncommit '
|
git_log_engine.result_separator = '\n\ncommit '
|
||||||
git_log_engine.delimiter = {}
|
git_log_engine.delimiter = {}
|
||||||
git_log_engine.parse_regex = {
|
git_log_engine.parse_regex = {
|
||||||
'commit': '\w{40}',
|
'commit': r'\w{40}',
|
||||||
'author': '[\w* ]* <\w*@?\w*\.?\w*>',
|
'author': r'[\w* ]* <\w*@?\w*\.?\w*>',
|
||||||
'date': 'Date: .*',
|
'date': r'Date: .*',
|
||||||
'message': '\n\n.*$',
|
'message': r'\n\n.*$',
|
||||||
}
|
}
|
||||||
|
git_log_engine.init({"command": git_log_engine.command, "parse_regex": git_log_engine.parse_regex})
|
||||||
expected_results = [
|
expected_results = [
|
||||||
{
|
{
|
||||||
'commit': '35f9a8c81d162a361b826bbcd4a1081a4fbe76a7',
|
'commit': '35f9a8c81d162a361b826bbcd4a1081a4fbe76a7',
|
||||||
|
@ -197,7 +198,7 @@ commit '''
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
results = git_log_engine.search(''.encode('utf-8'), {'pageno': 1})
|
results = git_log_engine.search('', {'pageno': 1})
|
||||||
self.assertEqual(results, expected_results)
|
self.assertEqual(results, expected_results)
|
||||||
|
|
||||||
def test_working_dir_path_query(self):
|
def test_working_dir_path_query(self):
|
||||||
|
@ -207,7 +208,7 @@ commit '''
|
||||||
ls_engine.delimiter = {'chars': ' ', 'keys': ['file']}
|
ls_engine.delimiter = {'chars': ' ', 'keys': ['file']}
|
||||||
ls_engine.query_type = 'path'
|
ls_engine.query_type = 'path'
|
||||||
|
|
||||||
results = ls_engine.search('.'.encode(), {'pageno': 1})
|
results = ls_engine.search('.', {'pageno': 1})
|
||||||
self.assertTrue(len(results) != 0)
|
self.assertTrue(len(results) != 0)
|
||||||
|
|
||||||
forbidden_paths = [
|
forbidden_paths = [
|
||||||
|
@ -218,7 +219,7 @@ commit '''
|
||||||
'/var',
|
'/var',
|
||||||
]
|
]
|
||||||
for forbidden_path in forbidden_paths:
|
for forbidden_path in forbidden_paths:
|
||||||
self.assertRaises(ValueError, ls_engine.search, '..'.encode(), {'pageno': 1})
|
self.assertRaises(ValueError, ls_engine.search, forbidden_path, {'pageno': 1})
|
||||||
|
|
||||||
def test_enum_queries(self):
|
def test_enum_queries(self):
|
||||||
echo_engine = command_engine
|
echo_engine = command_engine
|
||||||
|
@ -227,7 +228,7 @@ commit '''
|
||||||
echo_engine.query_enum = ['i-am-allowed-to-say-this', 'and-that']
|
echo_engine.query_enum = ['i-am-allowed-to-say-this', 'and-that']
|
||||||
|
|
||||||
for allowed in echo_engine.query_enum:
|
for allowed in echo_engine.query_enum:
|
||||||
results = echo_engine.search(allowed.encode(), {'pageno': 1})
|
results = echo_engine.search(allowed, {'pageno': 1})
|
||||||
self.assertTrue(len(results) != 0)
|
self.assertTrue(len(results) != 0)
|
||||||
|
|
||||||
forbidden_queries = [
|
forbidden_queries = [
|
||||||
|
@ -236,4 +237,4 @@ commit '''
|
||||||
'prohibited',
|
'prohibited',
|
||||||
]
|
]
|
||||||
for forbidden in forbidden_queries:
|
for forbidden in forbidden_queries:
|
||||||
self.assertRaises(ValueError, echo_engine.search, forbidden.encode(), {'pageno': 1})
|
self.assertRaises(ValueError, echo_engine.search, forbidden, {'pageno': 1})
|
||||||
|
|
|
@ -7,25 +7,43 @@ from searx.engines import xpath
|
||||||
from tests import SearxTestCase
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestXpathEngine(SearxTestCase):
|
class TestXpathEngine(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
|
html = """
|
||||||
|
<div>
|
||||||
|
<div class="search_result">
|
||||||
|
<a class="result" href="https://result1.com">Result 1</a>
|
||||||
|
<p class="content">Content 1</p>
|
||||||
|
<a class="cached" href="https://cachedresult1.com">Cache</a>
|
||||||
|
</div>
|
||||||
|
<div class="search_result">
|
||||||
|
<a class="result" href="https://result2.com">Result 2</a>
|
||||||
|
<p class="content">Content 2</p>
|
||||||
|
<a class="cached" href="https://cachedresult2.com">Cache</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
xpath.search_url = 'https://url.com/{query}'
|
xpath.search_url = 'https://url.com/{query}'
|
||||||
xpath.categories = []
|
xpath.categories = []
|
||||||
xpath.paging = False
|
xpath.paging = False
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
|
dicto['language'] = 'all'
|
||||||
|
dicto['pageno'] = 1
|
||||||
params = xpath.request(query, dicto)
|
params = xpath.request(query, dicto)
|
||||||
self.assertIn('url', params)
|
self.assertIn('url', params)
|
||||||
self.assertEquals('https://url.com/test_query', params['url'])
|
self.assertEqual('https://url.com/test_query', params['url'])
|
||||||
|
|
||||||
xpath.search_url = 'https://url.com/q={query}&p={pageno}'
|
xpath.search_url = 'https://url.com/q={query}&p={pageno}'
|
||||||
xpath.paging = True
|
xpath.paging = True
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
|
dicto['language'] = 'all'
|
||||||
dicto['pageno'] = 1
|
dicto['pageno'] = 1
|
||||||
params = xpath.request(query, dicto)
|
params = xpath.request(query, dicto)
|
||||||
self.assertIn('url', params)
|
self.assertIn('url', params)
|
||||||
self.assertEquals('https://url.com/q=test_query&p=1', params['url'])
|
self.assertEqual('https://url.com/q=test_query&p=1', params['url'])
|
||||||
|
|
||||||
def test_response(self):
|
def test_response(self):
|
||||||
# without results_xpath
|
# without results_xpath
|
||||||
|
@ -38,24 +56,10 @@ class TestXpathEngine(SearxTestCase):
|
||||||
self.assertRaises(AttributeError, xpath.response, '')
|
self.assertRaises(AttributeError, xpath.response, '')
|
||||||
self.assertRaises(AttributeError, xpath.response, '[]')
|
self.assertRaises(AttributeError, xpath.response, '[]')
|
||||||
|
|
||||||
response = mock.Mock(text='<html></html>')
|
response = mock.Mock(text='<html></html>', status_code=200)
|
||||||
self.assertEqual(xpath.response(response), [])
|
self.assertEqual(xpath.response(response), [])
|
||||||
|
|
||||||
html = u"""
|
response = mock.Mock(text=self.html, status_code=200)
|
||||||
<div>
|
|
||||||
<div class="search_result">
|
|
||||||
<a class="result" href="https://result1.com">Result 1</a>
|
|
||||||
<p class="content">Content 1</p>
|
|
||||||
<a class="cached" href="https://cachedresult1.com">Cache</a>
|
|
||||||
</div>
|
|
||||||
<div class="search_result">
|
|
||||||
<a class="result" href="https://result2.com">Result 2</a>
|
|
||||||
<p class="content">Content 2</p>
|
|
||||||
<a class="cached" href="https://cachedresult2.com">Cache</a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
"""
|
|
||||||
response = mock.Mock(text=html)
|
|
||||||
results = xpath.response(response)
|
results = xpath.response(response)
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 2)
|
self.assertEqual(len(results), 2)
|
||||||
|
@ -80,6 +84,7 @@ class TestXpathEngine(SearxTestCase):
|
||||||
results = xpath.response(response)
|
results = xpath.response(response)
|
||||||
self.assertTrue(results[0]['is_onion'])
|
self.assertTrue(results[0]['is_onion'])
|
||||||
|
|
||||||
|
def test_response_results_xpath(self):
|
||||||
# with results_xpath
|
# with results_xpath
|
||||||
xpath.results_xpath = '//div[@class="search_result"]'
|
xpath.results_xpath = '//div[@class="search_result"]'
|
||||||
xpath.url_xpath = './/a[@class="result"]/@href'
|
xpath.url_xpath = './/a[@class="result"]/@href'
|
||||||
|
@ -93,10 +98,10 @@ class TestXpathEngine(SearxTestCase):
|
||||||
self.assertRaises(AttributeError, xpath.response, '')
|
self.assertRaises(AttributeError, xpath.response, '')
|
||||||
self.assertRaises(AttributeError, xpath.response, '[]')
|
self.assertRaises(AttributeError, xpath.response, '[]')
|
||||||
|
|
||||||
response = mock.Mock(text='<html></html>')
|
response = mock.Mock(text='<html></html>', status_code=200)
|
||||||
self.assertEqual(xpath.response(response), [])
|
self.assertEqual(xpath.response(response), [])
|
||||||
|
|
||||||
response = mock.Mock(text=html)
|
response = mock.Mock(text=self.html, status_code=200)
|
||||||
results = xpath.response(response)
|
results = xpath.response(response)
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 2)
|
self.assertEqual(len(results), 2)
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# pylint: disable=missing-module-docstring
|
|
@ -0,0 +1,53 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# pylint: disable=missing-module-docstring
|
||||||
|
|
||||||
|
from searx.search import SearchQuery, EngineRef
|
||||||
|
from searx.search.processors import online
|
||||||
|
from searx.engines import load_engines
|
||||||
|
from searx import engines
|
||||||
|
|
||||||
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
TEST_ENGINE_NAME = 'dummy engine'
|
||||||
|
TEST_ENGINE = {
|
||||||
|
'name': TEST_ENGINE_NAME,
|
||||||
|
'engine': 'dummy',
|
||||||
|
'categories': 'general',
|
||||||
|
'shortcut': 'du',
|
||||||
|
'timeout': 3.0,
|
||||||
|
'tokens': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
load_engines([TEST_ENGINE])
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
load_engines([])
|
||||||
|
|
||||||
|
def _get_params(self, online_processor, search_query, engine_category):
|
||||||
|
params = online_processor.get_params(search_query, engine_category)
|
||||||
|
self.assertIsNotNone(params)
|
||||||
|
assert params is not None
|
||||||
|
return params
|
||||||
|
|
||||||
|
def test_get_params_default_params(self):
|
||||||
|
engine = engines.engines[TEST_ENGINE_NAME]
|
||||||
|
online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME)
|
||||||
|
search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None)
|
||||||
|
params = self._get_params(online_processor, search_query, 'general')
|
||||||
|
self.assertIn('method', params)
|
||||||
|
self.assertIn('headers', params)
|
||||||
|
self.assertIn('data', params)
|
||||||
|
self.assertIn('url', params)
|
||||||
|
self.assertIn('cookies', params)
|
||||||
|
self.assertIn('auth', params)
|
||||||
|
|
||||||
|
def test_get_params_useragent(self):
|
||||||
|
engine = engines.engines[TEST_ENGINE_NAME]
|
||||||
|
online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME)
|
||||||
|
search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None)
|
||||||
|
params = self._get_params(online_processor, search_query, 'general')
|
||||||
|
self.assertIn('User-Agent', params['headers'])
|
|
@ -247,6 +247,7 @@ class TestBang(SearxTestCase): # pylint:disable=missing-class-docstring
|
||||||
self.assertEqual(query.user_query_parts, TestBang.THE_QUERY.split(' '))
|
self.assertEqual(query.user_query_parts, TestBang.THE_QUERY.split(' '))
|
||||||
|
|
||||||
def test_specific(self):
|
def test_specific(self):
|
||||||
|
load_engines(TEST_ENGINES)
|
||||||
for bang in TestBang.SPECIFIC_BANGS:
|
for bang in TestBang.SPECIFIC_BANGS:
|
||||||
with self.subTest(msg="Check bang is specific", bang=bang):
|
with self.subTest(msg="Check bang is specific", bang=bang):
|
||||||
query_text = TestBang.THE_QUERY + ' ' + bang
|
query_text = TestBang.THE_QUERY + ' ' + bang
|
||||||
|
|
Loading…
Reference in New Issue