Compare commits

...

3 Commits

Author SHA1 Message Date
Bnyro 51e8c61ca5
Merge 7edd75ff93 into 10d3af84b8 2024-11-20 08:21:28 +08:00
Markus Heiser 10d3af84b8 [fix] engine: duckduckgo - don't quote query string
The query string send to DDG must not be qouted.

The query string was URL-qouted in #4011, but the URL-qouted query string result
in unexpected *URL decoded* and other garbish results as reported in #4019
and #4020.  To test compare the results of a query like::

    !ddg Häuser und Straßen :de
    !ddg Häuser und Straßen :all
    !ddg 房屋和街道 :all
    !ddg 房屋和街道 :zh

Closed:

- [#4019] https://github.com/searxng/searxng/issues/4019
- [#4020] https://github.com/searxng/searxng/issues/4020

Related:

- [#4011] https://github.com/searxng/searxng/pull/4011

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-17 18:14:22 +01:00
Bnyro 7edd75ff93 [refactor] translation engines: common interface 2024-10-17 17:17:40 +02:00
9 changed files with 132 additions and 81 deletions

View File

@ -1,8 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Deepl translation engine"""
from json import loads
about = {
"website": 'https://deepl.com',
"wikidata_id": 'Q43968444',
@ -41,16 +39,14 @@ def request(_query, params):
def response(resp):
results = []
result = loads(resp.text)
translations = result['translations']
infobox = "<dl>"
result = resp.json()
for translation in translations:
infobox += f"<dd>{translation['text']}</dd>"
if not result.get('translations'):
return results
infobox += "</dl>"
translations = [{'text': translation['text']} for translation in result['translations']]
results.append({'answer': infobox})
results.append({'answer': translations[0]['text'], 'answer_type': 'translations', 'translations': translations})
return results

View File

@ -3,7 +3,6 @@
Dictzone
"""
from urllib.parse import urljoin
from lxml import html
from searx.utils import eval_xpath
@ -33,11 +32,10 @@ def request(query, params): # pylint: disable=unused-argument
def response(resp):
results = []
dom = html.fromstring(resp.text)
for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]):
translations = []
for result in eval_xpath(dom, results_xpath)[1:]:
try:
from_result, to_results_raw = eval_xpath(result, './td')
except: # pylint: disable=bare-except
@ -49,12 +47,17 @@ def response(resp):
if t.strip():
to_results.append(to_result.text_content())
results.append(
translations.append(
{
'url': urljoin(str(resp.url), '?%d' % k),
'title': from_result.text_content(),
'content': '; '.join(to_results),
'text': f"{from_result.text_content()} - {'; '.join(to_results)}",
}
)
return results
if translations:
result = {
'answer': translations[0]['text'],
'translations': translations,
'answer_type': 'translations',
}
return [result]

View File

@ -6,7 +6,7 @@ DuckDuckGo Lite
from typing import TYPE_CHECKING
import re
from urllib.parse import urlencode, quote_plus
from urllib.parse import urlencode
import json
import babel
import lxml.html
@ -263,7 +263,7 @@ def request(query, params):
params['url'] = url
params['method'] = 'POST'
params['data']['q'] = quote_plus(query)
params['data']['q'] = query
# The API is not documented, so we do some reverse engineering and emulate
# what https://html.duckduckgo.com/html does when you press "next Page" link
@ -381,7 +381,11 @@ def response(resp):
zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()
if zero_click and "Your IP address is" not in zero_click and "Your user agent:" not in zero_click:
if zero_click and (
"Your IP address is" not in zero_click
and "Your user agent:" not in zero_click
and "URL Decoded:" not in zero_click
):
current_query = resp.search_params["data"].get("q")
results.append(

View File

@ -24,7 +24,7 @@ def request(_query, params):
request_url = random.choice(base_url) if isinstance(base_url, list) else base_url
params['url'] = f"{request_url}/translate"
args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query']}
args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query'], 'alternatives': 3}
if api_key:
args['api_key'] = api_key
params['data'] = dumps(args)
@ -42,12 +42,11 @@ def response(resp):
json_resp = resp.json()
text = json_resp.get('translatedText')
from_lang = resp.search_params["from_lang"][1]
to_lang = resp.search_params["to_lang"][1]
query = resp.search_params["query"]
req_url = resp.search_params["req_url"]
if not text:
return results
if text:
results.append({"answer": text, "url": f"{req_url}/?source={from_lang}&target={to_lang}&q={query}"})
translations = [{'text': text}] + [{'text': alternative} for alternative in json_resp.get('alternatives', [])]
results.append({'answer': text, 'answer_type': 'translations', 'translations': translations})
return results

View File

@ -1,8 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Lingva (alternative Google Translate frontend)"""
from json import loads
about = {
"website": 'https://lingva.ml',
"wikidata_id": None,
@ -29,7 +27,7 @@ def request(_query, params):
def response(resp):
results = []
result = loads(resp.text)
result = resp.json()
info = result["info"]
from_to_prefix = "%s-%s " % (resp.search_params['from_lang'][1], resp.search_params['to_lang'][1])
@ -38,27 +36,40 @@ def response(resp):
if 'definitions' in info: # pylint: disable=too-many-nested-blocks
for definition in info['definitions']:
if 'list' in definition:
for item in definition['list']:
if 'synonyms' in item:
for synonym in item['synonyms']:
results.append({"suggestion": from_to_prefix + synonym})
for item in definition.get('list', []):
for synonym in item.get('synonyms', []):
results.append({"suggestion": from_to_prefix + synonym})
infobox = ""
data = []
for definition in info['definitions']:
for translation in definition['list']:
data.append(
{
'text': result['translation'],
'definitions': [translation['definition']] if translation['definition'] else [],
'examples': [translation['example']] if translation['example'] else [],
'synonyms': translation['synonyms'],
}
)
for translation in info["extraTranslations"]:
for word in translation["list"]:
infobox += f"<dl><dt>{word['word']}</dt>"
data.append(
{
'text': word['word'],
'definitions': word['meanings'],
}
)
for meaning in word["meanings"]:
infobox += f"<dd>{meaning}</dd>"
infobox += "</dl>"
if not data and result['translation']:
data.append({'text': result['translation']})
results.append(
{
'infobox': result["translation"],
'content': infobox,
'answer': data[0]['text'],
'answer_type': 'translations',
'translations': data,
}
)

View File

@ -4,7 +4,6 @@
import random
import re
from urllib.parse import urlencode
from flask_babel import gettext
about = {
"website": 'https://codeberg.org/aryak/mozhi',
@ -35,30 +34,27 @@ def request(_query, params):
def response(resp):
translation = resp.json()
infobox = ""
data = {'text': translation['translated-text'], 'definitions': [], 'examples': []}
if translation['target_transliteration'] and not re.match(
re_transliteration_unsupported, translation['target_transliteration']
):
infobox = f"<b>{translation['target_transliteration']}</b>"
data['transliteration'] = translation['target_transliteration']
if translation['word_choices']:
for word in translation['word_choices']:
infobox += f"<dl><dt>{word['word']}: {word['definition']}</dt>"
if word.get('definition'):
data['definitions'].append(word['definition'])
if word['examples_target']:
for example in word['examples_target']:
infobox += f"<dd>{re.sub(r'<|>', '', example)}</dd>"
infobox += f"<dd>{re.sub(r'<|>', '', example)}</dd>"
for example in word.get('examples_target', []):
data['examples'].append(re.sub(r"<|>", "", example).lstrip('- '))
infobox += "</dl>"
if translation['source_synonyms']:
infobox += f"<dl><dt>{gettext('Synonyms')}: {', '.join(translation['source_synonyms'])}</dt></dl>"
data['synonyms'] = translation.get('source_synonyms', [])
result = {
'infobox': translation['translated-text'],
'content': infobox,
'answer': translation['translated-text'],
'answer_type': 'translations',
'translations': [data],
}
return [result]

View File

@ -35,18 +35,16 @@ def request(query, params): # pylint: disable=unused-argument
def response(resp):
results = []
results.append(
{
'url': web_url.format(
from_lang=resp.search_params['from_lang'][2],
to_lang=resp.search_params['to_lang'][2],
query=resp.search_params['query'],
),
'title': '[{0}-{1}] {2}'.format(
resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], resp.search_params['query']
),
'content': resp.json()['responseData']['translatedText'],
}
)
return results
json_resp = resp.json()
text = json_resp['responseData']['translatedText']
alternatives = [match['translation'] for match in json_resp['matches'] if match['translation'] != text]
translations = [{'text': translation} for translation in [text] + alternatives]
result = {
'answer': translations[0]['text'],
'answer_type': 'translations',
'translations': translations,
}
return [result]

View File

@ -0,0 +1,38 @@
<div class="answer-translations">
{% for translation in translations %}
{% if loop.index > 1 %}
<hr />
{% endif %}
<h3>{{ translation.text }}</h3>
{% if translation.transliteration %}
<b>translation.transliteration</b>
{% endif %} {% if translation.definitions %}
<dl>
<dt>{{ _('Definitions') }}</dt>
<ul>
{% for definition in translation.definitions %}
<li>{{ definition }}</li>
{% endfor %}
<ul>
</dl>
{% endif %} {% if translation.examples %}
<dl>
<dt>{{ _('Examples') }}</dt>
<ul>
{% for example in translation.examples %}
<li>{{ example }}</li>
{% endfor %}
</ul>
</dl>
{% endif %} {% if translation.synonyms %}
<dl>
<dt>{{ _('Synonyms') }}</dt>
<ul>
{% for synonym in translation.synonyms %}
<li>{{ synonym }}</li>
{% endfor %}
</ul>
</dl>
{% endif %}
{% endfor %}
</div>

View File

@ -23,14 +23,20 @@
<div id="answers" role="complementary" aria-labelledby="answers-title"><h4 class="title" id="answers-title">{{ _('Answers') }} : </h4>
{%- for answer in answers.values() -%}
<div class="answer">
<span>{{ answer.answer }}</span>
{%- if answer.url -%}
<a href="{{ answer.url }}" class="answer-url"
{%- if results_on_new_tab %} target="_blank" rel="noopener noreferrer"
{%- else -%} rel="noreferrer"
{%- endif -%}
>{{ urlparse(answer.url).hostname }}</a>
{% endif -%}
{%- if answer.answer_type == 'translations' -%}
{% with translations=answer.translations %}
{% include 'simple/answerers/translate.html' %}
{% endwith %}
{%- else -%}
<span>{{ answer.answer }}</span>
{%- if answer.url -%}
<a href="{{ answer.url }}" class="answer-url"
{%- if results_on_new_tab %} target="_blank" rel="noopener noreferrer"
{%- else -%} rel="noreferrer"
{%- endif -%}
>{{ urlparse(answer.url).hostname }}</a>
{% endif -%}
{%- endif -%}
</div>
{%- endfor -%}
</div>