From bc5068feceb668edb8fd05346b56cfe48be74469 Mon Sep 17 00:00:00 2001 From: Richard Lyons Date: Fri, 21 Jun 2024 22:28:31 +0200 Subject: [PATCH 1/3] Fix elasticsearch custom_query. --- searx/engines/elasticsearch.py | 36 +++++-- tests/unit/engines/test_elasticsearch.py | 117 +++++++++++++++++++++++ 2 files changed, 147 insertions(+), 6 deletions(-) create mode 100644 tests/unit/engines/test_elasticsearch.py diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py index c721114a7..58b5ba3cc 100644 --- a/searx/engines/elasticsearch.py +++ b/searx/engines/elasticsearch.py @@ -135,14 +135,38 @@ def _terms_query(query): def _custom_query(query): - key, value = query.split(':') - custom_query = custom_query_json + key = value = None + if any(placeholder in custom_query_json for placeholder in ["{{KEY}}", "{{VALUE}}", "{{VALUES}}"]): + try: + key, value = query.split(':', maxsplit=1) + except Exception as e: + raise ValueError('query format must be "key:value"') from e + if not key: + raise ValueError('empty key from "key:value" query') + try: + custom_query = loads(custom_query_json) + except Exception as e: + raise ValueError('invalid custom_query string') from e + return _custom_query_r(query, key, value, custom_query) + + +def _custom_query_r(query, key, value, custom_query): + new_query = {} for query_key, query_value in custom_query.items(): if query_key == '{{KEY}}': - custom_query[key] = custom_query.pop(query_key) - if query_value == '{{VALUE}}': - custom_query[query_key] = value - return custom_query + query_key = key + + if isinstance(query_value, dict): + query_value = _custom_query_r(query, key, value, query_value) + elif query_value == '{{VALUE}}': + query_value = value + elif query_value == '{{VALUES}}': + query_value = value.split(',') + elif query_value == '{{QUERY}}': + query_value = query + + new_query[query_key] = query_value + return new_query def response(resp): diff --git a/tests/unit/engines/test_elasticsearch.py b/tests/unit/engines/test_elasticsearch.py new file mode 100644 index 000000000..5875d9425 --- /dev/null +++ b/tests/unit/engines/test_elasticsearch.py @@ -0,0 +1,117 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring + +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +''' + +from json import loads +from searx.engines import elasticsearch as elasticsearch_engine +from tests import SearxTestCase + + +class TestElasticsearchEngine(SearxTestCase): # pylint: disable=missing-class-docstring + default_params = {"headers": {}} + + def test_url_settings(self): + elasticsearch_engine.base_url = 'http://es:12345' + elasticsearch_engine.index = 'index' + params = elasticsearch_engine.request("city:berlin", self.default_params) + self.assertEqual(params["url"], "http://es:12345/index/_search") + + def test_basic_queries(self): + queries = [ + ['match', 'field:stuff', '{"query": {"match": {"field": {"query": "stuff"}}}}'], + ['simple_query_string', 'stuff', '{"query": {"simple_query_string": {"query": "stuff"}}}'], + ['term', 'field:stuff', '{"query": {"term": {"field": "stuff"}}}'], + ['terms', 'field:stuff1,stuff2', '{"query": {"terms": {"field": ["stuff1", "stuff2"]}}}'], + ] + + for query in queries: + elasticsearch_engine.query_type = query[0] + params = elasticsearch_engine.request(query[1], self.default_params) + self.assertEqual(loads(params["data"]), loads(query[2])) + + def test_basic_failures(self): + queries = [ + ['match', 'stuff'], + ['term', 'stuff'], + ['terms', 'stuff'], + ] + + for query in queries: + elasticsearch_engine.query_type = query[0] + self.assertRaises(ValueError, elasticsearch_engine.request, query[1], self.default_params) + + def test_custom_queries(self): + queries = [ + [ + 'field:stuff', + '{"query": {"match": {"{{KEY}}": {"query": "{{VALUE}}"}}}}', + '{"query": {"match": {"field": {"query": "stuff"}}}}', + ], + [ + 'stuff', + '{"query": {"simple_query_string": {"query": "{{QUERY}}"}}}', + '{"query": {"simple_query_string": {"query": "stuff"}}}', + ], + [ + 'more:stuff', + '{"query": {"simple_query_string": {"query": "{{QUERY}}"}}}', + '{"query": {"simple_query_string": {"query": "more:stuff"}}}', + ], + [ + 'field:stuff', + '{"query": {"term": {"{{KEY}}": "{{VALUE}}"}}}', + '{"query": {"term": {"field": "stuff"}}}', + ], + [ + 'field:more:stuff', + '{"query": {"match": {"{{KEY}}": {"query": "{{VALUE}}"}}}}', + '{"query": {"match": {"field": {"query": "more:stuff"}}}}', + ], + [ + 'field:stuff1,stuff2', + '{"query": {"terms": {"{{KEY}}": "{{VALUES}}"}}}', + '{"query": {"terms": {"field": ["stuff1", "stuff2"]}}}', + ], + [ + 'field:stuff1', + '{"query": {"terms": {"{{KEY}}": "{{VALUES}}"}}}', + '{"query": {"terms": {"field": ["stuff1"]}}}', + ], + ] + + elasticsearch_engine.query_type = 'custom' + for query in queries: + elasticsearch_engine.custom_query_json = query[1] + params = elasticsearch_engine.request(query[0], self.default_params) + self.assertEqual(loads(params["data"]), loads(query[2])) + + def test_custom_failures(self): + queries = [ + ['stuff', '{"query": {"match": {"{{KEY}}": {"query": "{{VALUE}}"}}}}'], + ['stuff', '{"query": {"terms": {"{{KEY}}": "{{VALUES}}"}}}'], + ['stuff', '{"query": {"simple_query_string": {"query": {{QUERY}}}}}'], + ['stuff', '"query": {"simple_query_string": {"query": "{{QUERY}}"}}}'], + ] + + elasticsearch_engine.query_type = 'custom' + for query in queries: + elasticsearch_engine.custom_query_json = query[1] + self.assertRaises(ValueError, elasticsearch_engine.request, query[0], self.default_params) + + +# vi:sw=4 From 7897ad1d5a552699ddb74f7039492cd4036509c2 Mon Sep 17 00:00:00 2001 From: Richard Lyons Date: Sat, 22 Jun 2024 13:30:49 +0200 Subject: [PATCH 2/3] Add tests for special characters [ '"]. --- tests/unit/engines/test_elasticsearch.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/unit/engines/test_elasticsearch.py b/tests/unit/engines/test_elasticsearch.py index 5875d9425..2c249afab 100644 --- a/tests/unit/engines/test_elasticsearch.py +++ b/tests/unit/engines/test_elasticsearch.py @@ -67,6 +67,21 @@ class TestElasticsearchEngine(SearxTestCase): # pylint: disable=missing-class-d '{"query": {"simple_query_string": {"query": "{{QUERY}}"}}}', '{"query": {"simple_query_string": {"query": "stuff"}}}', ], + [ + 'space stuff', + '{"query": {"simple_query_string": {"query": "{{QUERY}}"}}}', + '{"query": {"simple_query_string": {"query": "space stuff"}}}', + ], + [ + '"space stuff"', + '{"query": {"simple_query_string": {"query": "{{QUERY}}"}}}', + '{"query": {"simple_query_string": {"query": "\\\"space stuff\\\""}}}', + ], + [ + "embedded'apostrophe", + '{"query": {"simple_query_string": {"query": "{{QUERY}}"}}}', + '{"query": {"simple_query_string": {"query": "embedded\'apostrophe"}}}', + ], [ 'more:stuff', '{"query": {"simple_query_string": {"query": "{{QUERY}}"}}}', From 4c80c2458af3faf4e4574d712ecb1b32f80a10f3 Mon Sep 17 00:00:00 2001 From: Richard Lyons Date: Sat, 22 Jun 2024 14:02:28 +0200 Subject: [PATCH 3/3] Check exception context for actual error message. --- tests/unit/engines/test_elasticsearch.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/unit/engines/test_elasticsearch.py b/tests/unit/engines/test_elasticsearch.py index 2c249afab..9749a132c 100644 --- a/tests/unit/engines/test_elasticsearch.py +++ b/tests/unit/engines/test_elasticsearch.py @@ -46,14 +46,16 @@ class TestElasticsearchEngine(SearxTestCase): # pylint: disable=missing-class-d def test_basic_failures(self): queries = [ - ['match', 'stuff'], - ['term', 'stuff'], - ['terms', 'stuff'], + ['match', 'stuff', 'query format must be "key:value'], + ['term', 'stuff', 'query format must be key:value'], + ['terms', 'stuff', 'query format must be key:value1,value2'], ] for query in queries: elasticsearch_engine.query_type = query[0] - self.assertRaises(ValueError, elasticsearch_engine.request, query[1], self.default_params) + with self.assertRaises(ValueError) as context: + elasticsearch_engine.request(query[1], self.default_params) + self.assertIn(query[2], str(context.exception)) def test_custom_queries(self): queries = [ @@ -117,16 +119,18 @@ class TestElasticsearchEngine(SearxTestCase): # pylint: disable=missing-class-d def test_custom_failures(self): queries = [ - ['stuff', '{"query": {"match": {"{{KEY}}": {"query": "{{VALUE}}"}}}}'], - ['stuff', '{"query": {"terms": {"{{KEY}}": "{{VALUES}}"}}}'], - ['stuff', '{"query": {"simple_query_string": {"query": {{QUERY}}}}}'], - ['stuff', '"query": {"simple_query_string": {"query": "{{QUERY}}"}}}'], + ['stuff', '{"query": {"match": {"{{KEY}}": {"query": "{{VALUE}}"}}}}', 'query format must be "key:value"'], + ['stuff', '{"query": {"terms": {"{{KEY}}": "{{VALUES}}"}}}', 'query format must be "key:value"'], + ['stuff', '{"query": {"simple_query_string": {"query": {{QUERY}}}}}', 'invalid custom_query string'], + ['stuff', '"query": {"simple_query_string": {"query": "{{QUERY}}"}}}', 'invalid custom_query string'], ] elasticsearch_engine.query_type = 'custom' for query in queries: elasticsearch_engine.custom_query_json = query[1] - self.assertRaises(ValueError, elasticsearch_engine.request, query[0], self.default_params) + with self.assertRaises(ValueError) as context: + elasticsearch_engine.request(query[0], self.default_params) + self.assertIn(query[2], str(context.exception)) # vi:sw=4