[feat] add spellcheck functionality to searxng, closes #3816

This commit is contained in:
sepehrrasooli 2024-09-20 06:31:28 +03:30
parent d026486ce3
commit 839993d009
5 changed files with 61 additions and 0 deletions

View File

@ -16,3 +16,4 @@ redis==5.0.8
markdown-it-py==3.0.0
fasttext-predict==0.9.2.2
pytomlpp==1.0.13; python_version < '3.11'
pyspellchecker>=0.8.1

View File

@ -176,6 +176,9 @@
{% if 'safesearch' not in locked_preferences %}
{%- include 'simple/preferences/safesearch.html' -%}
{%- endif -%}
{% if 'spellcheck' not in locked_preferences %}
{%- include 'simple/preferences/spellcheck.html' -%}
{%- endif -%}
{{- plugin_preferences('general') -}}
{%- if 'doi_resolver' not in locked_preferences %}
{%- include 'simple/preferences/doi_resolver.html' -%}

View File

@ -0,0 +1,18 @@
<fieldset>{{- '' -}}
<legend id="pref_spellcheck">{{- _('Spell Check') -}}</legend>{{- '' -}}
<div class="value">{{- '' -}}
<select name='spellcheck' aria-labelledby="pref_spellcheck">{{- '' -}}
<option value="1"
{%- if spellcheck == '1' %} selected="selected" {%- endif -%}>
{{- _('On') -}}
</option>{{- '' -}}
<option value="0"
{%- if spellcheck == '0' %} selected="selected" {%- endif -%}>
{{- _('Off') -}}
</option>{{- '' -}}
</select>{{- '' -}}
</div>{{- '' -}}
<div class="description">
{{- _('Spell check search queries') -}}
</div>{{- '' -}}
</fieldset>{{- '' -}}

View File

@ -92,6 +92,25 @@
</div>
{% endif %}
{% if has_misspelled_words %}
<form method="{{ method or 'POST' }}" action="{{ url_for('search') }}">
<input type="hidden" name="q" value="{{ spellcheck_query }}" >
{% for category in selected_categories %}
<input type="hidden" name="category_{{ category }}" value="1" >
{% endfor %}
<input type="hidden" name="pageno" value="{{ pageno+1 }}" >
<input type="hidden" name="language" value="{{ current_language }}" >
<input type="hidden" name="time_range" value="{{ time_range }}" >
<input type="hidden" name="safesearch" value="{{ safesearch }}" >
<input type="hidden" name="theme" value="{{ theme }}" >
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
{{- engine_data_form(engine_data) -}}
<span style="color: #ff7769; font-size: 18px"> Did you mean:
<input style="background: none; border: none; color: #1a0dab; cursor: pointer; font-size: 20px" type="submit" role="link" value="{{ spellcheck_query }}">
</span>
</form>
{% endif %}
<div id="urls" role="main">
{% for result in results %}
{% if result.open_group and not only_template %}<div class="template_group_{{ result['template']|replace('.html', '') }}">{% endif %}

View File

@ -21,6 +21,7 @@ from typing import List, Dict, Iterable
import urllib
import urllib.parse
from urllib.parse import urlencode, urlparse, unquote
from spellchecker import SpellChecker
import httpx
@ -254,6 +255,17 @@ def code_highlighter(codelines, language=None):
return html_code
def spellcheck(query: str, user_language: str):
spell = SpellChecker(language=user_language)
spellcheck_list = query.split(' ')
spellcheck_list = [x for x in spellcheck_list if x != '']
has_misspelled_words = bool(spell.unknown(spellcheck_list))
for word, i in zip(spellcheck_list, range(0, len(spellcheck_list))):
if word != '':
spellcheck_list[i] = spell.correction(word) if spell.correction(word) is not None else ''
return ' '.join(spellcheck_list), has_misspelled_words
def get_result_template(theme_name: str, template_name: str):
themed_path = theme_name + '/result_templates/' + template_name
if themed_path in result_templates:
@ -760,6 +772,11 @@ def search():
result_container.corrections,
)
)
spellcheck_query, has_misspelled_words = '', False
if request.preferences.get_value("spellcheck") == "1" and search.search_query.lang[0:2] in SpellChecker.languages():
spellcheck_query, has_misspelled_words = spellcheck(
request.form['q'], user_language=search.search_query.lang[0:2]
)
# engine_timings: get engine response times sorted from slowest to fastest
engine_timings = sorted(result_container.get_timings(), reverse=True, key=lambda e: e.total)
@ -774,6 +791,8 @@ def search():
'results.html',
results = results,
q=request.form['q'],
spellcheck_query = spellcheck_query,
has_misspelled_words = has_misspelled_words,
selected_categories = search_query.categories,
pageno = search_query.pageno,
time_range = search_query.time_range or '',
@ -1010,6 +1029,7 @@ def preferences():
current_locale = request.preferences.get_value("locale"),
image_proxy = image_proxy,
engines_by_category = engines_by_category,
spellcheck = request.preferences.get_value("spellcheck"),
stats = stats,
max_rate95 = max_rate95,
reliabilities = reliabilities,