From a665e2d19cfab702380a9c5405e8ffcf2f3ea682 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Tue, 15 Oct 2024 11:06:37 +0200 Subject: [PATCH] [feat] search: support for filtering licenses --- searx/engines/__init__.py | 1 + searx/search/checker/impl.py | 1 + searx/search/models.py | 9 ++++++++- searx/search/processors/abstract.py | 4 ++++ searx/settings.yml | 1 + searx/static/themes/simple/src/js/main/search.js | 1 + searx/templates/simple/elements/apis.html | 1 + searx/templates/simple/filters/license.html | 14 ++++++++++++++ searx/templates/simple/results.html | 4 ++++ searx/templates/simple/search.html | 5 ++++- searx/webadapter.py | 11 +++++++++++ searx/webapp.py | 1 + searxng_extra/standalone_searx.py | 11 ++++++++++- tests/unit/processors/test_online.py | 8 ++++++-- tests/unit/test_search.py | 10 +++++----- 15 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 searx/templates/simple/filters/license.html diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index bcbdbe8aa..95504f15b 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -31,6 +31,7 @@ ENGINE_DEFAULT_ARGS = { "engine_type": "online", "paging": False, "time_range_support": False, + "license_filter_support": False, "safesearch": False, # settings.yml "categories": ["general"], diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index cf1f03449..38868f288 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -131,6 +131,7 @@ def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing. 'pageno': search_query.pageno, 'safesearch': search_query.safesearch, 'time_range': search_query.time_range, + 'license_filter': search_query.license_filter, } diff --git a/searx/search/models.py b/searx/search/models.py index 62424390f..63a494faf 100644 --- a/searx/search/models.py +++ b/searx/search/models.py @@ -35,6 +35,7 @@ class SearchQuery: 'safesearch', 'pageno', 'time_range', + 'license_filter', 'timeout_limit', 'external_bang', 'engine_data', @@ -49,6 +50,7 @@ class SearchQuery: safesearch: int = 0, pageno: int = 1, time_range: typing.Optional[str] = None, + license_filter: typing.Optional[str] = None, timeout_limit: typing.Optional[float] = None, external_bang: typing.Optional[str] = None, engine_data: typing.Optional[typing.Dict[str, str]] = None, @@ -60,6 +62,7 @@ class SearchQuery: self.safesearch = safesearch self.pageno = pageno self.time_range = time_range + self.license_filter = license_filter self.timeout_limit = timeout_limit self.external_bang = external_bang self.engine_data = engine_data or {} @@ -77,13 +80,14 @@ class SearchQuery: return list(set(map(lambda engineref: engineref.category, self.engineref_list))) def __repr__(self): - return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format( + return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format( self.query, self.engineref_list, self.lang, self.safesearch, self.pageno, self.time_range, + self.license_filter, self.timeout_limit, self.external_bang, self.redirect_to_first_result, @@ -97,6 +101,7 @@ class SearchQuery: and self.safesearch == other.safesearch and self.pageno == other.pageno and self.time_range == other.time_range + and self.license_filter == other.license_filter and self.timeout_limit == other.timeout_limit and self.external_bang == other.external_bang and self.redirect_to_first_result == other.redirect_to_first_result @@ -111,6 +116,7 @@ class SearchQuery: self.safesearch, self.pageno, self.time_range, + self.license_filter, self.timeout_limit, self.external_bang, self.redirect_to_first_result, @@ -125,6 +131,7 @@ class SearchQuery: self.safesearch, self.pageno, self.time_range, + self.license_filter, self.timeout_limit, self.external_bang, self.engine_data, diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index f89302a92..c0cd22c5c 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -157,11 +157,15 @@ class EngineProcessor(ABC): if search_query.time_range and not self.engine.time_range_support: return None + if search_query.license_filter and not self.engine.license_filter_support: + return None + params = {} params['category'] = engine_category params['pageno'] = search_query.pageno params['safesearch'] = search_query.safesearch params['time_range'] = search_query.time_range + params['license_filter'] = search_query.license_filter params['engine_data'] = search_query.engine_data.get(self.engine_name, {}) params['searxng_locale'] = search_query.lang diff --git a/searx/settings.yml b/searx/settings.yml index 5143e69c0..caf470942 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -685,6 +685,7 @@ engines: - name: duckduckgo images engine: duckduckgo_extra categories: [images, web] + license_filter_support: true ddg_category: images shortcut: ddi disabled: true diff --git a/searx/static/themes/simple/src/js/main/search.js b/searx/static/themes/simple/src/js/main/search.js index 46756507e..ac6d82f38 100644 --- a/searx/static/themes/simple/src/js/main/search.js +++ b/searx/static/themes/simple/src/js/main/search.js @@ -164,6 +164,7 @@ searxng.on(d.getElementById('safesearch'), 'change', submitIfQuery); searxng.on(d.getElementById('time_range'), 'change', submitIfQuery); searxng.on(d.getElementById('language'), 'change', submitIfQuery); + searxng.on(d.getElementById('license_filter'), 'change', submitIfQuery); } // most common browsers at the time of writing this support :has, except for Firefox diff --git a/searx/templates/simple/elements/apis.html b/searx/templates/simple/elements/apis.html index 5c7a07110..21b35e5f7 100644 --- a/searx/templates/simple/elements/apis.html +++ b/searx/templates/simple/elements/apis.html @@ -12,6 +12,7 @@ + {%- if timeout_limit -%} diff --git a/searx/templates/simple/filters/license.html b/searx/templates/simple/filters/license.html new file mode 100644 index 000000000..0d61006ac --- /dev/null +++ b/searx/templates/simple/filters/license.html @@ -0,0 +1,14 @@ + diff --git a/searx/templates/simple/results.html b/searx/templates/simple/results.html index fbc41a17a..927259053 100644 --- a/searx/templates/simple/results.html +++ b/searx/templates/simple/results.html @@ -82,6 +82,7 @@ + {% if timeout_limit %}{% endif %} @@ -118,6 +119,7 @@ + {% if timeout_limit %}{% endif %} @@ -136,6 +138,7 @@ + {% if timeout_limit %}{% endif %} @@ -161,6 +164,7 @@ + {% if timeout_limit %}{% endif %} diff --git a/searx/templates/simple/search.html b/searx/templates/simple/search.html index 360873c76..7c558e721 100644 --- a/searx/templates/simple/search.html +++ b/searx/templates/simple/search.html @@ -17,7 +17,10 @@
{% include 'simple/filters/languages.html' %} {% include 'simple/filters/time_range.html' %} - {% include 'simple/filters/safesearch.html' %} + {% if 'images' in selected_categories %} + {% include 'simple/filters/safesearch.html' %} + {% endif %} + {% include 'simple/filters/license.html' %}
{% if timeout_limit %}{% endif %} diff --git a/searx/webadapter.py b/searx/webadapter.py index 53d9bfa89..8e19cc725 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -102,6 +102,15 @@ def parse_time_range(form: Dict[str, str]) -> Optional[str]: return query_time_range +def parse_license_filter(form: Dict[str, str]) -> Optional[str]: + license_filter = form.get('license_filter') + + if license_filter in ('public', 'freetouse', 'commercial'): + return license_filter + + return None + + def parse_timeout(form: Dict[str, str], raw_text_query: RawTextQuery) -> Optional[float]: timeout_limit = raw_text_query.timeout_limit if timeout_limit is None: @@ -258,6 +267,7 @@ def get_search_query_from_webapp( query_pageno = parse_pageno(form) query_safesearch = parse_safesearch(preferences, form) query_time_range = parse_time_range(form) + query_license = parse_license_filter(form) query_timeout = parse_timeout(form, raw_text_query) external_bang = raw_text_query.external_bang redirect_to_first_result = raw_text_query.redirect_to_first_result @@ -292,6 +302,7 @@ def get_search_query_from_webapp( query_safesearch, query_pageno, query_time_range, + query_license, query_timeout, external_bang=external_bang, engine_data=engine_data, diff --git a/searx/webapp.py b/searx/webapp.py index 19c477794..02c94a24c 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -781,6 +781,7 @@ def search(): selected_categories = search_query.categories, pageno = search_query.pageno, time_range = search_query.time_range or '', + license_filter = search_query.license_filter or '', number_of_results = format_decimal(result_container.number_of_results), suggestions = suggestion_urls, answers = result_container.answers, diff --git a/searxng_extra/standalone_searx.py b/searxng_extra/standalone_searx.py index cf053d7ee..ac16585c1 100755 --- a/searxng_extra/standalone_searx.py +++ b/searxng_extra/standalone_searx.py @@ -67,6 +67,7 @@ def get_search_query( "pageno": str(args.pageno), "language": args.lang, "time_range": args.timerange, + 'license_filter': args.license_filter, } preferences = searx.preferences.Preferences(['simple'], engine_categories, searx.engines.engines, []) preferences.key_value_settings['safesearch'].parse(args.safesearch) @@ -106,7 +107,8 @@ def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]: "pageno": search_query.pageno, "lang": search_query.lang, "safesearch": search_query.safesearch, - "timerange": search_query.time_range, + "time_range": search_query.time_range, + "license": search_query.license, }, "results": no_parsed_url(result_container.get_ordered_results()), "infoboxes": result_container.infoboxes, @@ -160,6 +162,13 @@ def parse_argument( parser.add_argument( '--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'], help='Filter by time range' ) + parser.add_argument( + '--license_filter', + type=str, + nargs='?', + choices=['any', 'public', 'freetouse', 'commercial'], + help='Filter by license', + ) return parser.parse_args(args) diff --git a/tests/unit/processors/test_online.py b/tests/unit/processors/test_online.py index 10e0deb97..1e755a924 100644 --- a/tests/unit/processors/test_online.py +++ b/tests/unit/processors/test_online.py @@ -36,7 +36,9 @@ class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docst def test_get_params_default_params(self): engine = engines.engines[TEST_ENGINE_NAME] online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME) - search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None) + search_query = SearchQuery( + 'test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None, None + ) params = self._get_params(online_processor, search_query, 'general') self.assertIn('method', params) self.assertIn('headers', params) @@ -48,6 +50,8 @@ class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docst def test_get_params_useragent(self): engine = engines.engines[TEST_ENGINE_NAME] online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME) - search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None) + search_query = SearchQuery( + 'test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None, None + ) params = self._get_params(online_processor, search_query, 'general') self.assertIn('User-Agent', params['headers']) diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py index be95fb08e..23e163de9 100644 --- a/tests/unit/test_search.py +++ b/tests/unit/test_search.py @@ -29,7 +29,7 @@ class SearchQueryTestCase(SearxTestCase): # pylint: disable=missing-class-docst def test_repr(self): s = SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g') self.assertEqual( - repr(s), "SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g', None)" + repr(s), "SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g', None, None)" ) # noqa def test_eq(self): @@ -73,7 +73,7 @@ class SearchTestCase(SearxTestCase): # pylint: disable=missing-class-docstring def test_timeout_query_above_default_nomax(self): settings['outgoing']['max_request_timeout'] = None search_query = SearchQuery( - 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 5.0 + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None, 5.0 ) search = searx.search.Search(search_query) with self.app.test_request_context('/search'): @@ -83,7 +83,7 @@ class SearchTestCase(SearxTestCase): # pylint: disable=missing-class-docstring def test_timeout_query_below_default_nomax(self): settings['outgoing']['max_request_timeout'] = None search_query = SearchQuery( - 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 1.0 + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None, 1.0 ) search = searx.search.Search(search_query) with self.app.test_request_context('/search'): @@ -93,7 +93,7 @@ class SearchTestCase(SearxTestCase): # pylint: disable=missing-class-docstring def test_timeout_query_below_max(self): settings['outgoing']['max_request_timeout'] = 10.0 search_query = SearchQuery( - 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 5.0 + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None, 5.0 ) search = searx.search.Search(search_query) with self.app.test_request_context('/search'): @@ -103,7 +103,7 @@ class SearchTestCase(SearxTestCase): # pylint: disable=missing-class-docstring def test_timeout_query_above_max(self): settings['outgoing']['max_request_timeout'] = 10.0 search_query = SearchQuery( - 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 15.0 + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None, 15.0 ) search = searx.search.Search(search_query) with self.app.test_request_context('/search'):