[feat] search: support for filtering licenses

This commit is contained in:
Bnyro 2024-10-15 11:06:37 +02:00
parent 3e87354f0e
commit a665e2d19c
15 changed files with 72 additions and 10 deletions

View File

@ -31,6 +31,7 @@ ENGINE_DEFAULT_ARGS = {
"engine_type": "online",
"paging": False,
"time_range_support": False,
"license_filter_support": False,
"safesearch": False,
# settings.yml
"categories": ["general"],

View File

@ -131,6 +131,7 @@ def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing.
'pageno': search_query.pageno,
'safesearch': search_query.safesearch,
'time_range': search_query.time_range,
'license_filter': search_query.license_filter,
}

View File

@ -35,6 +35,7 @@ class SearchQuery:
'safesearch',
'pageno',
'time_range',
'license_filter',
'timeout_limit',
'external_bang',
'engine_data',
@ -49,6 +50,7 @@ class SearchQuery:
safesearch: int = 0,
pageno: int = 1,
time_range: typing.Optional[str] = None,
license_filter: typing.Optional[str] = None,
timeout_limit: typing.Optional[float] = None,
external_bang: typing.Optional[str] = None,
engine_data: typing.Optional[typing.Dict[str, str]] = None,
@ -60,6 +62,7 @@ class SearchQuery:
self.safesearch = safesearch
self.pageno = pageno
self.time_range = time_range
self.license_filter = license_filter
self.timeout_limit = timeout_limit
self.external_bang = external_bang
self.engine_data = engine_data or {}
@ -77,13 +80,14 @@ class SearchQuery:
return list(set(map(lambda engineref: engineref.category, self.engineref_list)))
def __repr__(self):
return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format(
return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format(
self.query,
self.engineref_list,
self.lang,
self.safesearch,
self.pageno,
self.time_range,
self.license_filter,
self.timeout_limit,
self.external_bang,
self.redirect_to_first_result,
@ -97,6 +101,7 @@ class SearchQuery:
and self.safesearch == other.safesearch
and self.pageno == other.pageno
and self.time_range == other.time_range
and self.license_filter == other.license_filter
and self.timeout_limit == other.timeout_limit
and self.external_bang == other.external_bang
and self.redirect_to_first_result == other.redirect_to_first_result
@ -111,6 +116,7 @@ class SearchQuery:
self.safesearch,
self.pageno,
self.time_range,
self.license_filter,
self.timeout_limit,
self.external_bang,
self.redirect_to_first_result,
@ -125,6 +131,7 @@ class SearchQuery:
self.safesearch,
self.pageno,
self.time_range,
self.license_filter,
self.timeout_limit,
self.external_bang,
self.engine_data,

View File

@ -157,11 +157,15 @@ class EngineProcessor(ABC):
if search_query.time_range and not self.engine.time_range_support:
return None
if search_query.license_filter and not self.engine.license_filter_support:
return None
params = {}
params['category'] = engine_category
params['pageno'] = search_query.pageno
params['safesearch'] = search_query.safesearch
params['time_range'] = search_query.time_range
params['license_filter'] = search_query.license_filter
params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
params['searxng_locale'] = search_query.lang

View File

@ -685,6 +685,7 @@ engines:
- name: duckduckgo images
engine: duckduckgo_extra
categories: [images, web]
license_filter_support: true
ddg_category: images
shortcut: ddi
disabled: true

View File

@ -164,6 +164,7 @@
searxng.on(d.getElementById('safesearch'), 'change', submitIfQuery);
searxng.on(d.getElementById('time_range'), 'change', submitIfQuery);
searxng.on(d.getElementById('language'), 'change', submitIfQuery);
searxng.on(d.getElementById('license_filter'), 'change', submitIfQuery);
}
// most common browsers at the time of writing this support :has, except for Firefox

View File

@ -12,6 +12,7 @@
<input type="hidden" name="pageno" value="{{ pageno }}">
<input type="hidden" name="language" value="{{ current_language }}">
<input type="hidden" name="time_range" value="{{ time_range }}">
<input type="hidden" name="license_filter" value="{{ license_filter }}">
<input type="hidden" name="safesearch" value="{{ safesearch }}">
<input type="hidden" name="format" value="{{ output_type }}">
{%- if timeout_limit -%}

View File

@ -0,0 +1,14 @@
<select name="license_filter" id="license_filter" class="license_filter" aria-label="{{ _('License') }}">{{- '' -}}
<option id="license-any" value="" {{ "selected" if license=="" or not license else ""}}>
{{- _('None') -}}
</option>{{- '' -}}
<option id="license-public" value="public" {{ "selected" if license=="public" else ""}}>
{{- _('Public domain') -}}
</option>{{- '' -}}
<option id="license-freetouse" value="freetouse" {{ "selected" if license=="freetouse" else ""}}>
{{- _('Free to use') -}}
</option>{{- '' -}}
<option id="license-commercial" value="commercial" {{ "selected" if license=="commercial" else ""}}>
{{- _('Commercial') -}}
</option>{{- '' -}}
</select>

View File

@ -82,6 +82,7 @@
<input type="hidden" name="q" value="{{ correction.url }}">
<input type="hidden" name="language" value="{{ current_language }}">
<input type="hidden" name="time_range" value="{{ time_range }}">
<input type="hidden" name="license_filter" value="{{ license_filter }}">
<input type="hidden" name="safesearch" value="{{ safesearch }}">
<input type="hidden" name="theme" value="{{ theme }}">
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit }}" >{% endif %}
@ -118,6 +119,7 @@
<input type="hidden" name="pageno" value="{{ pageno-1 }}" >
<input type="hidden" name="language" value="{{ current_language }}" >
<input type="hidden" name="time_range" value="{{ time_range }}" >
<input type="hidden" name="license_filter" value="{{ license_filter }}">
<input type="hidden" name="safesearch" value="{{ safesearch }}" >
<input type="hidden" name="theme" value="{{ theme }}" >
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
@ -136,6 +138,7 @@
<input type="hidden" name="pageno" value="{{ pageno+1 }}" >
<input type="hidden" name="language" value="{{ current_language }}" >
<input type="hidden" name="time_range" value="{{ time_range }}" >
<input type="hidden" name="license_filter" value="{{ license_filter }}">
<input type="hidden" name="safesearch" value="{{ safesearch }}" >
<input type="hidden" name="theme" value="{{ theme }}" >
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
@ -161,6 +164,7 @@
<input type="hidden" name="pageno" value="{{ x }}" >
<input type="hidden" name="language" value="{{ current_language }}" >
<input type="hidden" name="time_range" value="{{ time_range }}" >
<input type="hidden" name="license_filter" value="{{ license_filter }}">
<input type="hidden" name="safesearch" value="{{ safesearch }}" >
<input type="hidden" name="theme" value="{{ theme }}" >
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}

View File

@ -17,7 +17,10 @@
<div class="search_filters">
{% include 'simple/filters/languages.html' %}
{% include 'simple/filters/time_range.html' %}
{% include 'simple/filters/safesearch.html' %}
{% if 'images' in selected_categories %}
{% include 'simple/filters/safesearch.html' %}
{% endif %}
{% include 'simple/filters/license.html' %}
</div>
<input type="hidden" name="theme" value="{{ theme }}" >
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}

View File

@ -102,6 +102,15 @@ def parse_time_range(form: Dict[str, str]) -> Optional[str]:
return query_time_range
def parse_license_filter(form: Dict[str, str]) -> Optional[str]:
license_filter = form.get('license_filter')
if license_filter in ('public', 'freetouse', 'commercial'):
return license_filter
return None
def parse_timeout(form: Dict[str, str], raw_text_query: RawTextQuery) -> Optional[float]:
timeout_limit = raw_text_query.timeout_limit
if timeout_limit is None:
@ -258,6 +267,7 @@ def get_search_query_from_webapp(
query_pageno = parse_pageno(form)
query_safesearch = parse_safesearch(preferences, form)
query_time_range = parse_time_range(form)
query_license = parse_license_filter(form)
query_timeout = parse_timeout(form, raw_text_query)
external_bang = raw_text_query.external_bang
redirect_to_first_result = raw_text_query.redirect_to_first_result
@ -292,6 +302,7 @@ def get_search_query_from_webapp(
query_safesearch,
query_pageno,
query_time_range,
query_license,
query_timeout,
external_bang=external_bang,
engine_data=engine_data,

View File

@ -781,6 +781,7 @@ def search():
selected_categories = search_query.categories,
pageno = search_query.pageno,
time_range = search_query.time_range or '',
license_filter = search_query.license_filter or '',
number_of_results = format_decimal(result_container.number_of_results),
suggestions = suggestion_urls,
answers = result_container.answers,

View File

@ -67,6 +67,7 @@ def get_search_query(
"pageno": str(args.pageno),
"language": args.lang,
"time_range": args.timerange,
'license_filter': args.license_filter,
}
preferences = searx.preferences.Preferences(['simple'], engine_categories, searx.engines.engines, [])
preferences.key_value_settings['safesearch'].parse(args.safesearch)
@ -106,7 +107,8 @@ def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]:
"pageno": search_query.pageno,
"lang": search_query.lang,
"safesearch": search_query.safesearch,
"timerange": search_query.time_range,
"time_range": search_query.time_range,
"license": search_query.license,
},
"results": no_parsed_url(result_container.get_ordered_results()),
"infoboxes": result_container.infoboxes,
@ -160,6 +162,13 @@ def parse_argument(
parser.add_argument(
'--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'], help='Filter by time range'
)
parser.add_argument(
'--license_filter',
type=str,
nargs='?',
choices=['any', 'public', 'freetouse', 'commercial'],
help='Filter by license',
)
return parser.parse_args(args)

View File

@ -36,7 +36,9 @@ class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docst
def test_get_params_default_params(self):
engine = engines.engines[TEST_ENGINE_NAME]
online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME)
search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None)
search_query = SearchQuery(
'test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None, None
)
params = self._get_params(online_processor, search_query, 'general')
self.assertIn('method', params)
self.assertIn('headers', params)
@ -48,6 +50,8 @@ class TestOnlineProcessor(SearxTestCase): # pylint: disable=missing-class-docst
def test_get_params_useragent(self):
engine = engines.engines[TEST_ENGINE_NAME]
online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME)
search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None)
search_query = SearchQuery(
'test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None, None
)
params = self._get_params(online_processor, search_query, 'general')
self.assertIn('User-Agent', params['headers'])

View File

@ -29,7 +29,7 @@ class SearchQueryTestCase(SearxTestCase): # pylint: disable=missing-class-docst
def test_repr(self):
s = SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g')
self.assertEqual(
repr(s), "SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g', None)"
repr(s), "SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g', None, None)"
) # noqa
def test_eq(self):
@ -73,7 +73,7 @@ class SearchTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
def test_timeout_query_above_default_nomax(self):
settings['outgoing']['max_request_timeout'] = None
search_query = SearchQuery(
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 5.0
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None, 5.0
)
search = searx.search.Search(search_query)
with self.app.test_request_context('/search'):
@ -83,7 +83,7 @@ class SearchTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
def test_timeout_query_below_default_nomax(self):
settings['outgoing']['max_request_timeout'] = None
search_query = SearchQuery(
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 1.0
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None, 1.0
)
search = searx.search.Search(search_query)
with self.app.test_request_context('/search'):
@ -93,7 +93,7 @@ class SearchTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
def test_timeout_query_below_max(self):
settings['outgoing']['max_request_timeout'] = 10.0
search_query = SearchQuery(
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 5.0
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None, 5.0
)
search = searx.search.Search(search_query)
with self.app.test_request_context('/search'):
@ -103,7 +103,7 @@ class SearchTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
def test_timeout_query_above_max(self):
settings['outgoing']['max_request_timeout'] = 10.0
search_query = SearchQuery(
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 15.0
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None, 15.0
)
search = searx.search.Search(search_query)
with self.app.test_request_context('/search'):