[feat] add spellcheck functionality to searxng, closes #3816

This commit is contained in:
sepehrrasooli 2024-09-20 06:31:28 +03:30 committed by Markus Heiser
parent b183e620d8
commit c5de9784ea
5 changed files with 61 additions and 0 deletions

View File

@ -19,3 +19,4 @@ tomli==2.0.2; python_version < '3.11'
msgspec==0.18.6 msgspec==0.18.6
eval_type_backport; python_version < '3.9' eval_type_backport; python_version < '3.9'
typer-slim==0.12.5 typer-slim==0.12.5
pyspellchecker>=0.8.1

View File

@ -179,6 +179,9 @@
{% if 'safesearch' not in locked_preferences %} {% if 'safesearch' not in locked_preferences %}
{%- include 'simple/preferences/safesearch.html' -%} {%- include 'simple/preferences/safesearch.html' -%}
{%- endif -%} {%- endif -%}
{% if 'spellcheck' not in locked_preferences %}
{%- include 'simple/preferences/spellcheck.html' -%}
{%- endif -%}
{{- plugin_preferences('general') -}} {{- plugin_preferences('general') -}}
{%- if 'doi_resolver' not in locked_preferences %} {%- if 'doi_resolver' not in locked_preferences %}
{%- include 'simple/preferences/doi_resolver.html' -%} {%- include 'simple/preferences/doi_resolver.html' -%}

View File

@ -0,0 +1,18 @@
<fieldset>{{- '' -}}
<legend id="pref_spellcheck">{{- _('Spell Check') -}}</legend>{{- '' -}}
<div class="value">{{- '' -}}
<select name='spellcheck' aria-labelledby="pref_spellcheck">{{- '' -}}
<option value="1"
{%- if spellcheck == '1' %} selected="selected" {%- endif -%}>
{{- _('On') -}}
</option>{{- '' -}}
<option value="0"
{%- if spellcheck == '0' %} selected="selected" {%- endif -%}>
{{- _('Off') -}}
</option>{{- '' -}}
</select>{{- '' -}}
</div>{{- '' -}}
<div class="description">
{{- _('Spell check search queries') -}}
</div>{{- '' -}}
</fieldset>{{- '' -}}

View File

@ -92,6 +92,25 @@
</div> </div>
{% endif %} {% endif %}
{% if has_misspelled_words %}
<form method="{{ method or 'POST' }}" action="{{ url_for('search') }}">
<input type="hidden" name="q" value="{{ spellcheck_query }}" >
{% for category in selected_categories %}
<input type="hidden" name="category_{{ category }}" value="1" >
{% endfor %}
<input type="hidden" name="pageno" value="{{ pageno+1 }}" >
<input type="hidden" name="language" value="{{ current_language }}" >
<input type="hidden" name="time_range" value="{{ time_range }}" >
<input type="hidden" name="safesearch" value="{{ safesearch }}" >
<input type="hidden" name="theme" value="{{ theme }}" >
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
{{- engine_data_form(engine_data) -}}
<span style="color: #ff7769; font-size: 18px"> Did you mean:
<input style="background: none; border: none; color: #1a0dab; cursor: pointer; font-size: 20px" type="submit" role="link" value="{{ spellcheck_query }}">
</span>
</form>
{% endif %}
<div id="urls" role="main"> <div id="urls" role="main">
{% for result in results %} {% for result in results %}
{% if result.open_group and not only_template %}<div class="template_group_{{ result['template']|replace('.html', '') }}">{% endif %} {% if result.open_group and not only_template %}<div class="template_group_{{ result['template']|replace('.html', '') }}">{% endif %}

View File

@ -21,6 +21,7 @@ from typing import List, Dict, Iterable
import urllib import urllib
import urllib.parse import urllib.parse
from urllib.parse import urlencode, urlparse, unquote from urllib.parse import urlencode, urlparse, unquote
from spellchecker import SpellChecker
import httpx import httpx
@ -256,6 +257,17 @@ def code_highlighter(codelines, language=None):
return html_code return html_code
def spellcheck(query: str, user_language: str):
spell = SpellChecker(language=user_language)
spellcheck_list = query.split(' ')
spellcheck_list = [x for x in spellcheck_list if x != '']
has_misspelled_words = bool(spell.unknown(spellcheck_list))
for word, i in zip(spellcheck_list, range(0, len(spellcheck_list))):
if word != '':
spellcheck_list[i] = spell.correction(word) if spell.correction(word) is not None else ''
return ' '.join(spellcheck_list), has_misspelled_words
def get_result_template(theme_name: str, template_name: str): def get_result_template(theme_name: str, template_name: str):
themed_path = theme_name + '/result_templates/' + template_name themed_path = theme_name + '/result_templates/' + template_name
if themed_path in result_templates: if themed_path in result_templates:
@ -764,6 +776,11 @@ def search():
result_container.corrections, result_container.corrections,
) )
) )
spellcheck_query, has_misspelled_words = '', False
if request.preferences.get_value("spellcheck") == "1" and search.search_query.lang[0:2] in SpellChecker.languages():
spellcheck_query, has_misspelled_words = spellcheck(
request.form['q'], user_language=search.search_query.lang[0:2]
)
# engine_timings: get engine response times sorted from slowest to fastest # engine_timings: get engine response times sorted from slowest to fastest
engine_timings = sorted(result_container.get_timings(), reverse=True, key=lambda e: e.total) engine_timings = sorted(result_container.get_timings(), reverse=True, key=lambda e: e.total)
@ -778,6 +795,8 @@ def search():
'results.html', 'results.html',
results = results, results = results,
q=request.form['q'], q=request.form['q'],
spellcheck_query = spellcheck_query,
has_misspelled_words = has_misspelled_words,
selected_categories = search_query.categories, selected_categories = search_query.categories,
pageno = search_query.pageno, pageno = search_query.pageno,
time_range = search_query.time_range or '', time_range = search_query.time_range or '',
@ -1014,6 +1033,7 @@ def preferences():
current_locale = request.preferences.get_value("locale"), current_locale = request.preferences.get_value("locale"),
image_proxy = image_proxy, image_proxy = image_proxy,
engines_by_category = engines_by_category, engines_by_category = engines_by_category,
spellcheck = request.preferences.get_value("spellcheck"),
stats = stats, stats = stats,
max_rate95 = max_rate95, max_rate95 = max_rate95,
reliabilities = reliabilities, reliabilities = reliabilities,