Compare commits

...

5 Commits

Author SHA1 Message Date
Sepehr Rasouli 288bb7e440
Merge c5de9784ea into b07c0ae39f 2024-11-02 00:11:22 +00:00
Bnyro b07c0ae39f [fix] annas archive: crash when no thumbnail, differing results, paging 2024-11-01 12:49:33 +01:00
Markus Heiser 56e3d72a76 [fix] CI: remove target test.coverage from python's test matrix
The test.coverage cause a lot of failed CI jobs for reasons that cannot be
explained.  As we do not monitor the coverage anyway, it is superfluous to run
this job, especially as it only has a disruptive effect on the CI.

BTW and the CI action upload-artifact@v3 is deprecated [1]

[1] https://github.com/actions/upload-artifact?tab=readme-ov-file#actionsupload-artifact

Related: https://github.com/searxng/searxng/issues/3983
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-01 10:14:57 +01:00
searxng-bot cc148a76b0 [l10n] update translations from Weblate
a4cdaaa26 - 2024-10-30 - Juno Takano <jutty@users.noreply.translate.codeberg.org>
46bad3a79 - 2024-10-29 - saltsnorter <saltsnorter@users.noreply.translate.codeberg.org>
6a4096da9 - 2024-10-27 - Eryk Michalak <gnu.ewm@protonmail.com>
64815d956 - 2024-10-28 - ljansen <ljansen@users.noreply.translate.codeberg.org>
851ae554d - 2024-10-26 - return42 <return42@users.noreply.translate.codeberg.org>
24f16d5e3 - 2024-10-26 - return42 <return42@users.noreply.translate.codeberg.org>
8278d1cb9 - 2024-10-26 - Atul_Eterno <Atul_Eterno@users.noreply.translate.codeberg.org>
2024-11-01 08:30:38 +01:00
sepehrrasooli c5de9784ea [feat] add spellcheck functionality to searxng, closes #3816 2024-10-30 08:30:44 +01:00
21 changed files with 127 additions and 62 deletions

View File

@ -45,14 +45,6 @@ jobs:
make V=1 gecko.driver
- name: Run tests
run: make V=1 ci.test
- name: Test coverage
run: make V=1 test.coverage
- name: Store coverage result
uses: actions/upload-artifact@v3
with:
name: coverage-${{ matrix.python-version }}
path: coverage/
retention-days: 60
themes:
name: Themes

View File

@ -19,3 +19,4 @@ tomli==2.0.2; python_version < '3.11'
msgspec==0.18.6
eval_type_backport; python_version < '3.9'
typer-slim==0.12.5
pyspellchecker>=0.8.1

View File

@ -34,10 +34,10 @@ Implementations
"""
from typing import List, Dict, Any, Optional
from urllib.parse import quote
from urllib.parse import urlencode
from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list
from searx.utils import extract_text, eval_xpath, eval_xpath_getindex, eval_xpath_list
from searx.enginelib.traits import EngineTraits
from searx.data import ENGINE_TRAITS
@ -53,7 +53,7 @@ about: Dict[str, Any] = {
# engine dependent config
categories: List[str] = ["files"]
paging: bool = False
paging: bool = True
# search-url
base_url: str = "https://annas-archive.org"
@ -99,9 +99,18 @@ def init(engine_settings=None): # pylint: disable=unused-argument
def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
q = quote(query)
lang = traits.get_language(params["language"], traits.all_locale) # type: ignore
params["url"] = base_url + f"/search?lang={lang or ''}&content={aa_content}&ext={aa_ext}&sort={aa_sort}&q={q}"
args = {
'lang': lang,
'content': aa_content,
'ext': aa_ext,
'sort': aa_sort,
'q': query,
'page': params['pageno'],
}
# filter out None and empty values
filtered_args = dict((k, v) for k, v in args.items() if v)
params["url"] = f"{base_url}/search?{urlencode(filtered_args)}"
return params
@ -128,12 +137,12 @@ def response(resp) -> List[Dict[str, Optional[str]]]:
def _get_result(item):
return {
'template': 'paper.html',
'url': base_url + item.xpath('./@href')[0],
'url': base_url + extract_text(eval_xpath_getindex(item, './@href', 0)),
'title': extract_text(eval_xpath(item, './/h3/text()[1]')),
'publisher': extract_text(eval_xpath(item, './/div[contains(@class, "text-sm")]')),
'authors': [extract_text(eval_xpath(item, './/div[contains(@class, "italic")]'))],
'content': extract_text(eval_xpath(item, './/div[contains(@class, "text-xs")]')),
'thumbnail': item.xpath('.//img/@src')[0],
'thumbnail': extract_text(eval_xpath_getindex(item, './/img/@src', 0, default=None), allow_none=True),
}

View File

@ -179,6 +179,9 @@
{% if 'safesearch' not in locked_preferences %}
{%- include 'simple/preferences/safesearch.html' -%}
{%- endif -%}
{% if 'spellcheck' not in locked_preferences %}
{%- include 'simple/preferences/spellcheck.html' -%}
{%- endif -%}
{{- plugin_preferences('general') -}}
{%- if 'doi_resolver' not in locked_preferences %}
{%- include 'simple/preferences/doi_resolver.html' -%}

View File

@ -0,0 +1,18 @@
<fieldset>{{- '' -}}
<legend id="pref_spellcheck">{{- _('Spell Check') -}}</legend>{{- '' -}}
<div class="value">{{- '' -}}
<select name='spellcheck' aria-labelledby="pref_spellcheck">{{- '' -}}
<option value="1"
{%- if spellcheck == '1' %} selected="selected" {%- endif -%}>
{{- _('On') -}}
</option>{{- '' -}}
<option value="0"
{%- if spellcheck == '0' %} selected="selected" {%- endif -%}>
{{- _('Off') -}}
</option>{{- '' -}}
</select>{{- '' -}}
</div>{{- '' -}}
<div class="description">
{{- _('Spell check search queries') -}}
</div>{{- '' -}}
</fieldset>{{- '' -}}

View File

@ -92,6 +92,25 @@
</div>
{% endif %}
{% if has_misspelled_words %}
<form method="{{ method or 'POST' }}" action="{{ url_for('search') }}">
<input type="hidden" name="q" value="{{ spellcheck_query }}" >
{% for category in selected_categories %}
<input type="hidden" name="category_{{ category }}" value="1" >
{% endfor %}
<input type="hidden" name="pageno" value="{{ pageno+1 }}" >
<input type="hidden" name="language" value="{{ current_language }}" >
<input type="hidden" name="time_range" value="{{ time_range }}" >
<input type="hidden" name="safesearch" value="{{ safesearch }}" >
<input type="hidden" name="theme" value="{{ theme }}" >
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
{{- engine_data_form(engine_data) -}}
<span style="color: #ff7769; font-size: 18px"> Did you mean:
<input style="background: none; border: none; color: #1a0dab; cursor: pointer; font-size: 20px" type="submit" role="link" value="{{ spellcheck_query }}">
</span>
</form>
{% endif %}
<div id="urls" role="main">
{% for result in results %}
{% if result.open_group and not only_template %}<div class="template_group_{{ result['template']|replace('.html', '') }}">{% endif %}

View File

@ -39,7 +39,7 @@ msgstr ""
"Project-Id-Version: searx\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-10-06 14:31+0000\n"
"PO-Revision-Date: 2024-10-26 21:13+0000\n"
"Last-Translator: Atul_Eterno <Atul_Eterno@users.noreply.translate.codeberg."
"org>\n"
"Language-Team: Spanish <https://translate.codeberg.org/projects/searxng/"
@ -49,7 +49,7 @@ msgstr ""
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: Weblate 5.7.2\n"
"X-Generator: Weblate 5.8.1\n"
"Generated-By: Babel 2.16.0\n"
#. CONSTANT_NAMES['NO_SUBGROUPING']
@ -155,7 +155,7 @@ msgstr "preguntas y respuestas"
#. CATEGORY_GROUPS['REPOS']
#: searx/searxng.msg
msgid "repos"
msgstr "repos"
msgstr "repositorios"
#. CATEGORY_GROUPS['SOFTWARE_WIKIS']
#: searx/searxng.msg

View File

@ -22,13 +22,14 @@
# MVDW-Java <MVDW-Java@users.noreply.translate.codeberg.org>, 2024.
# notlmutsaers <notlmutsaers@users.noreply.translate.codeberg.org>, 2024.
# return42 <return42@users.noreply.translate.codeberg.org>, 2024.
# ljansen <ljansen@users.noreply.translate.codeberg.org>, 2024.
msgid ""
msgstr ""
"Project-Id-Version: searx\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-10-15 12:18+0000\n"
"Last-Translator: return42 <return42@users.noreply.translate.codeberg.org>\n"
"PO-Revision-Date: 2024-10-28 21:07+0000\n"
"Last-Translator: ljansen <ljansen@users.noreply.translate.codeberg.org>\n"
"Language-Team: Dutch <https://translate.codeberg.org/projects/searxng/"
"searxng/nl/>\n"
"Language: nl\n"
@ -36,7 +37,7 @@ msgstr ""
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: Weblate 5.7.2\n"
"X-Generator: Weblate 5.8.1\n"
"Generated-By: Babel 2.16.0\n"
#. CONSTANT_NAMES['NO_SUBGROUPING']
@ -464,7 +465,7 @@ msgstr "Bereken {functions} van de opties"
#: searx/engines/mozhi.py:57
msgid "Synonyms"
msgstr ""
msgstr "Synoniemen"
#: searx/engines/openstreetmap.py:159
msgid "Get directions"
@ -1234,12 +1235,13 @@ msgid "Max time"
msgstr "Max. duur"
#: searx/templates/simple/preferences/favicon.html:2
#, fuzzy
msgid "Favicon Resolver"
msgstr ""
msgstr "favicon-resolver"
#: searx/templates/simple/preferences/favicon.html:15
msgid "Display favicons near search results"
msgstr ""
msgstr "Vertoon zoekresultaten naast favicons"
#: searx/templates/simple/preferences/footer.html:2
msgid ""

View File

@ -23,8 +23,8 @@ msgstr ""
"Project-Id-Version: searx\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-10-15 12:18+0000\n"
"Last-Translator: return42 <return42@users.noreply.translate.codeberg.org>\n"
"PO-Revision-Date: 2024-10-28 21:07+0000\n"
"Last-Translator: Eryk Michalak <gnu.ewm@protonmail.com>\n"
"Language-Team: Polish <https://translate.codeberg.org/projects/searxng/"
"searxng/pl/>\n"
"Language: pl\n"
@ -34,7 +34,7 @@ msgstr ""
"Plural-Forms: nplurals=4; plural=(n==1 ? 0 : (n%10>=2 && n%10<=4) && ("
"n%100<12 || n%100>14) ? 1 : n!=1 && (n%10>=0 && n%10<=1) || (n%10>=5 && "
"n%10<=9) || (n%100>=12 && n%100<=14) ? 2 : 3);\n"
"X-Generator: Weblate 5.7.2\n"
"X-Generator: Weblate 5.8.1\n"
"Generated-By: Babel 2.16.0\n"
#. CONSTANT_NAMES['NO_SUBGROUPING']
@ -1230,7 +1230,7 @@ msgstr "Maksymalny czas"
#: searx/templates/simple/preferences/favicon.html:2
msgid "Favicon Resolver"
msgstr ""
msgstr "Pobieranie favikony"
#: searx/templates/simple/preferences/favicon.html:15
msgid "Display favicons near search results"

View File

@ -19,13 +19,14 @@
# gvlx <gvlx@users.noreply.translate.codeberg.org>, 2024.
# ds451 <ds451@users.noreply.translate.codeberg.org>, 2024.
# Pedro_Tresp <Pedro_Tresp@users.noreply.translate.codeberg.org>, 2024.
# saltsnorter <saltsnorter@users.noreply.translate.codeberg.org>, 2024.
msgid ""
msgstr ""
"Project-Id-Version: searx\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-10-13 23:26+0000\n"
"Last-Translator: Pedro_Tresp <Pedro_Tresp@users.noreply.translate.codeberg."
"PO-Revision-Date: 2024-10-29 05:54+0000\n"
"Last-Translator: saltsnorter <saltsnorter@users.noreply.translate.codeberg."
"org>\n"
"Language-Team: Portuguese <https://translate.codeberg.org/projects/searxng/"
"searxng/pt/>\n"
@ -34,7 +35,7 @@ msgstr ""
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
"X-Generator: Weblate 5.7.2\n"
"X-Generator: Weblate 5.8.1\n"
"Generated-By: Babel 2.16.0\n"
#. CONSTANT_NAMES['NO_SUBGROUPING']
@ -175,7 +176,7 @@ msgstr "escuro"
#. STYLE_NAMES['BLACK']
#: searx/searxng.msg
msgid "black"
msgstr ""
msgstr "preto"
#. BRAND_CUSTOM_LINKS['UPTIME']
#: searx/searxng.msg
@ -1228,11 +1229,11 @@ msgstr "Tempo máximo"
#: searx/templates/simple/preferences/favicon.html:2
msgid "Favicon Resolver"
msgstr ""
msgstr "Solucionador do Favicon"
#: searx/templates/simple/preferences/favicon.html:15
msgid "Display favicons near search results"
msgstr ""
msgstr "Monstra os favicons nos proximos os resultados"
#: searx/templates/simple/preferences/footer.html:2
msgid ""

View File

@ -30,13 +30,14 @@
# Pyrbor <Pyrbor@users.noreply.translate.codeberg.org>, 2024.
# rodgui <rodgui@users.noreply.translate.codeberg.org>, 2024.
# rafablog77 <rafablog77@users.noreply.translate.codeberg.org>, 2024.
# Juno Takano <jutty@users.noreply.translate.codeberg.org>, 2024.
msgid ""
msgstr ""
"Project-Id-Version: searx\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-10-15 12:18+0000\n"
"Last-Translator: return42 <return42@users.noreply.translate.codeberg.org>\n"
"PO-Revision-Date: 2024-10-31 12:16+0000\n"
"Last-Translator: Juno Takano <jutty@users.noreply.translate.codeberg.org>\n"
"Language-Team: Portuguese (Brazil) <https://translate.codeberg.org/projects/"
"searxng/searxng/pt_BR/>\n"
"Language: pt_BR\n"
@ -44,7 +45,7 @@ msgstr ""
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=n > 1;\n"
"X-Generator: Weblate 5.7.2\n"
"X-Generator: Weblate 5.8.1\n"
"Generated-By: Babel 2.16.0\n"
#. CONSTANT_NAMES['NO_SUBGROUPING']
@ -185,7 +186,7 @@ msgstr "escuro"
#. STYLE_NAMES['BLACK']
#: searx/searxng.msg
msgid "black"
msgstr ""
msgstr "preto"
#. BRAND_CUSTOM_LINKS['UPTIME']
#: searx/searxng.msg
@ -472,7 +473,7 @@ msgstr "Computar {functions} dos argumentos"
#: searx/engines/mozhi.py:57
msgid "Synonyms"
msgstr ""
msgstr "Sinônimos"
#: searx/engines/openstreetmap.py:159
msgid "Get directions"
@ -1243,7 +1244,7 @@ msgstr "Tempo máximo"
#: searx/templates/simple/preferences/favicon.html:2
msgid "Favicon Resolver"
msgstr ""
msgstr "Resolvedor de Favicons"
#: searx/templates/simple/preferences/favicon.html:15
msgid "Display favicons near search results"

View File

@ -12,19 +12,19 @@
# tvminh19 <tvminh19@users.noreply.translate.codeberg.org>, 2024.
msgid ""
msgstr ""
"Project-Id-Version: searx\n"
"Project-Id-Version: searx\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-08-07 01:02+0000\n"
"Last-Translator: tvminh19 <tvminh19@users.noreply.translate.codeberg.org>"
"\n"
"PO-Revision-Date: 2024-10-26 21:13+0000\n"
"Last-Translator: return42 <return42@users.noreply.translate.codeberg.org>\n"
"Language-Team: Vietnamese <https://translate.codeberg.org/projects/searxng/"
"searxng/vi/>\n"
"Language: vi\n"
"Language-Team: Vietnamese "
"<https://translate.codeberg.org/projects/searxng/searxng/vi/>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"X-Generator: Weblate 5.8.1\n"
"Generated-By: Babel 2.16.0\n"
#. CONSTANT_NAMES['NO_SUBGROUPING']
@ -186,7 +186,7 @@ msgstr "Nhiệt độ trung bình."
#. WEATHER_TERMS['CLOUD COVER']
#: searx/engines/open_meteo.py:91 searx/searxng.msg
msgid "Cloud cover"
msgstr ""
msgstr "Mây che phủ"
#. WEATHER_TERMS['CONDITION']
#: searx/engines/duckduckgo_weather.py:45 searx/engines/wttr.py:51
@ -283,7 +283,7 @@ msgstr ""
#: searx/engines/duckduckgo_weather.py:58 searx/engines/open_meteo.py:86
#: searx/engines/wttr.py:62 searx/searxng.msg
msgid "Wind"
msgstr ""
msgstr "Gió"
#. SOCIAL_MEDIA_TERMS['SUBSCRIBERS']
#: searx/engines/lemmy.py:85 searx/searxng.msg
@ -1990,4 +1990,3 @@ msgstr "ẩn phim"
#~ msgid "Engines cannot retrieve results"
#~ msgstr "Các trình tìm kiếm không nhận được kết quả"

View File

@ -28,18 +28,19 @@
# hugoalh <hugoalh@users.noreply.translate.codeberg.org>, 2024.
msgid ""
msgstr ""
"Project-Id-Version: searx\n"
"Project-Id-Version: searx\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-08-12 04:00+0000\n"
"Last-Translator: hugoalh <hugoalh@users.noreply.translate.codeberg.org>\n"
"PO-Revision-Date: 2024-10-26 21:13+0000\n"
"Last-Translator: return42 <return42@users.noreply.translate.codeberg.org>\n"
"Language-Team: Chinese (Traditional Han script) <https://translate.codeberg."
"org/projects/searxng/searxng/zh_Hant/>\n"
"Language: zh_Hant_TW\n"
"Language-Team: Chinese (Traditional) "
"<https://translate.codeberg.org/projects/searxng/searxng/zh_Hant/>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"X-Generator: Weblate 5.8.1\n"
"Generated-By: Babel 2.16.0\n"
#. CONSTANT_NAMES['NO_SUBGROUPING']
@ -180,7 +181,7 @@ msgstr "黑暗"
#. STYLE_NAMES['BLACK']
#: searx/searxng.msg
msgid "black"
msgstr ""
msgstr "黑色"
#. BRAND_CUSTOM_LINKS['UPTIME']
#: searx/searxng.msg
@ -467,7 +468,7 @@ msgstr "計算 {functions} 參數"
#: searx/engines/mozhi.py:57
msgid "Synonyms"
msgstr ""
msgstr "同義詞"
#: searx/engines/openstreetmap.py:159
msgid "Get directions"
@ -942,7 +943,7 @@ msgstr "來自搜尋引擎的訊息"
#: searx/templates/simple/elements/engines_msg.html:7
msgid "seconds"
msgstr ""
msgstr ""
#: searx/templates/simple/elements/search_url.html:3
msgid "Search URL"
@ -1206,11 +1207,11 @@ msgstr "最大時間"
#: searx/templates/simple/preferences/favicon.html:2
msgid "Favicon Resolver"
msgstr ""
msgstr "網站圖標搜索器"
#: searx/templates/simple/preferences/favicon.html:15
msgid "Display favicons near search results"
msgstr ""
msgstr "在搜尋結果旁顯示網站圖標"
#: searx/templates/simple/preferences/footer.html:2
msgid ""
@ -1911,4 +1912,3 @@ msgstr "隱藏影片"
#~ msgid "Engines cannot retrieve results"
#~ msgstr "引擎無法擷取結果"

View File

@ -21,6 +21,7 @@ from typing import List, Dict, Iterable
import urllib
import urllib.parse
from urllib.parse import urlencode, urlparse, unquote
from spellchecker import SpellChecker
import httpx
@ -256,6 +257,17 @@ def code_highlighter(codelines, language=None):
return html_code
def spellcheck(query: str, user_language: str):
spell = SpellChecker(language=user_language)
spellcheck_list = query.split(' ')
spellcheck_list = [x for x in spellcheck_list if x != '']
has_misspelled_words = bool(spell.unknown(spellcheck_list))
for word, i in zip(spellcheck_list, range(0, len(spellcheck_list))):
if word != '':
spellcheck_list[i] = spell.correction(word) if spell.correction(word) is not None else ''
return ' '.join(spellcheck_list), has_misspelled_words
def get_result_template(theme_name: str, template_name: str):
themed_path = theme_name + '/result_templates/' + template_name
if themed_path in result_templates:
@ -764,6 +776,11 @@ def search():
result_container.corrections,
)
)
spellcheck_query, has_misspelled_words = '', False
if request.preferences.get_value("spellcheck") == "1" and search.search_query.lang[0:2] in SpellChecker.languages():
spellcheck_query, has_misspelled_words = spellcheck(
request.form['q'], user_language=search.search_query.lang[0:2]
)
# engine_timings: get engine response times sorted from slowest to fastest
engine_timings = sorted(result_container.get_timings(), reverse=True, key=lambda e: e.total)
@ -778,6 +795,8 @@ def search():
'results.html',
results = results,
q=request.form['q'],
spellcheck_query = spellcheck_query,
has_misspelled_words = has_misspelled_words,
selected_categories = search_query.categories,
pageno = search_query.pageno,
time_range = search_query.time_range or '',
@ -1014,6 +1033,7 @@ def preferences():
current_locale = request.preferences.get_value("locale"),
image_proxy = image_proxy,
engines_by_category = engines_by_category,
spellcheck = request.preferences.get_value("spellcheck"),
stats = stats,
max_rate95 = max_rate95,
reliabilities = reliabilities,