Merge 7edd75ff93 into 10d3af84b8

[fix] engine: duckduckgo - don't quote query string
The query string send to DDG must not be qouted. The query string was URL-qouted in #4011, but the URL-qouted query string result in unexpected *URL decoded* and other garbish results as reported in #4019 and #4020. To test compare the results of a query like:: !ddg Häuser und Straßen :de !ddg Häuser und Straßen :all !ddg 房屋和街道 :all !ddg 房屋和街道 :zh Closed: - [#4019] https://github.com/searxng/searxng/issues/4019 - [#4020] https://github.com/searxng/searxng/issues/4020 Related: - [#4011] https://github.com/searxng/searxng/pull/4011 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-20 08:21:28 +08:00 · 2024-11-17 18:14:22 +01:00 · 2024-10-17 17:17:40 +02:00
9 changed files with 132 additions and 81 deletions
--- a/searx/engines/deepl.py
+++ b/searx/engines/deepl.py
@ -1,8 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Deepl translation engine"""

-from json import loads
-
 about = {
    "website": 'https://deepl.com',
    "wikidata_id": 'Q43968444',
@ -41,16 +39,14 @@ def request(_query, params):

 def response(resp):
    results = []
-    result = loads(resp.text)
-    translations = result['translations']

-    infobox = "<dl>"
+    result = resp.json()

-    for translation in translations:
-        infobox += f"<dd>{translation['text']}</dd>"
+    if not result.get('translations'):
+        return results

-    infobox += "</dl>"
+    translations = [{'text': translation['text']} for translation in result['translations']]

-    results.append({'answer': infobox})
+    results.append({'answer': translations[0]['text'], 'answer_type': 'translations', 'translations': translations})

    return results
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
@ -3,7 +3,6 @@
 Dictzone
 """

-from urllib.parse import urljoin
 from lxml import html
 from searx.utils import eval_xpath

@ -33,11 +32,10 @@ def request(query, params):  # pylint: disable=unused-argument


 def response(resp):
-    results = []
-
    dom = html.fromstring(resp.text)

-    for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]):
+    translations = []
+    for result in eval_xpath(dom, results_xpath)[1:]:
        try:
            from_result, to_results_raw = eval_xpath(result, './td')
        except:  # pylint: disable=bare-except
@ -49,12 +47,17 @@ def response(resp):
            if t.strip():
                to_results.append(to_result.text_content())

-        results.append(
+        translations.append(
            {
-                'url': urljoin(str(resp.url), '?%d' % k),
-                'title': from_result.text_content(),
-                'content': '; '.join(to_results),
+                'text': f"{from_result.text_content()} - {'; '.join(to_results)}",
            }
        )

-    return results
+    if translations:
+        result = {
+            'answer': translations[0]['text'],
+            'translations': translations,
+            'answer_type': 'translations',
+        }
+
+    return [result]
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@ -6,7 +6,7 @@ DuckDuckGo Lite

 from typing import TYPE_CHECKING
 import re
-from urllib.parse import urlencode, quote_plus
+from urllib.parse import urlencode
 import json
 import babel
 import lxml.html
@ -263,7 +263,7 @@ def request(query, params):

    params['url'] = url
    params['method'] = 'POST'
-    params['data']['q'] = quote_plus(query)
+    params['data']['q'] = query

    # The API is not documented, so we do some reverse engineering and emulate
    # what https://html.duckduckgo.com/html does when you press "next Page" link
@ -381,7 +381,11 @@ def response(resp):
    zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
    zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()

-    if zero_click and "Your IP address is" not in zero_click and "Your user agent:" not in zero_click:
+    if zero_click and (
+        "Your IP address is" not in zero_click
+        and "Your user agent:" not in zero_click
+        and "URL Decoded:" not in zero_click
+    ):
        current_query = resp.search_params["data"].get("q")

        results.append(
--- a/searx/engines/libretranslate.py
+++ b/searx/engines/libretranslate.py
@ -24,7 +24,7 @@ def request(_query, params):
    request_url = random.choice(base_url) if isinstance(base_url, list) else base_url
    params['url'] = f"{request_url}/translate"

-    args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query']}
+    args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query'], 'alternatives': 3}
    if api_key:
        args['api_key'] = api_key
    params['data'] = dumps(args)
@ -42,12 +42,11 @@ def response(resp):
    json_resp = resp.json()
    text = json_resp.get('translatedText')

-    from_lang = resp.search_params["from_lang"][1]
-    to_lang = resp.search_params["to_lang"][1]
-    query = resp.search_params["query"]
-    req_url = resp.search_params["req_url"]
+    if not text:
+        return results

-    if text:
-        results.append({"answer": text, "url": f"{req_url}/?source={from_lang}&target={to_lang}&q={query}"})
+    translations = [{'text': text}] + [{'text': alternative} for alternative in json_resp.get('alternatives', [])]
+
+    results.append({'answer': text, 'answer_type': 'translations', 'translations': translations})

    return results
--- a/searx/engines/lingva.py
+++ b/searx/engines/lingva.py
@ -1,8 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Lingva (alternative Google Translate frontend)"""

-from json import loads
-
 about = {
    "website": 'https://lingva.ml',
    "wikidata_id": None,
@ -29,7 +27,7 @@ def request(_query, params):
 def response(resp):
    results = []

-    result = loads(resp.text)
+    result = resp.json()
    info = result["info"]
    from_to_prefix = "%s-%s " % (resp.search_params['from_lang'][1], resp.search_params['to_lang'][1])

@ -38,27 +36,40 @@ def response(resp):

    if 'definitions' in info:  # pylint: disable=too-many-nested-blocks
        for definition in info['definitions']:
-            if 'list' in definition:
-                for item in definition['list']:
-                    if 'synonyms' in item:
-                        for synonym in item['synonyms']:
-                            results.append({"suggestion": from_to_prefix + synonym})
+            for item in definition.get('list', []):
+                for synonym in item.get('synonyms', []):
+                    results.append({"suggestion": from_to_prefix + synonym})

-    infobox = ""
+    data = []
+
+    for definition in info['definitions']:
+        for translation in definition['list']:
+            data.append(
+                {
+                    'text': result['translation'],
+                    'definitions': [translation['definition']] if translation['definition'] else [],
+                    'examples': [translation['example']] if translation['example'] else [],
+                    'synonyms': translation['synonyms'],
+                }
+            )

    for translation in info["extraTranslations"]:
        for word in translation["list"]:
-            infobox += f"<dl><dt>{word['word']}</dt>"
+            data.append(
+                {
+                    'text': word['word'],
+                    'definitions': word['meanings'],
+                }
+            )

-            for meaning in word["meanings"]:
-                infobox += f"<dd>{meaning}</dd>"
-
-            infobox += "</dl>"
+    if not data and result['translation']:
+        data.append({'text': result['translation']})

    results.append(
        {
-            'infobox': result["translation"],
-            'content': infobox,
+            'answer': data[0]['text'],
+            'answer_type': 'translations',
+            'translations': data,
        }
    )

--- a/searx/engines/mozhi.py
+++ b/searx/engines/mozhi.py
@ -4,7 +4,6 @@
 import random
 import re
 from urllib.parse import urlencode
-from flask_babel import gettext

 about = {
    "website": 'https://codeberg.org/aryak/mozhi',
@ -35,30 +34,27 @@ def request(_query, params):
 def response(resp):
    translation = resp.json()

-    infobox = ""
+    data = {'text': translation['translated-text'], 'definitions': [], 'examples': []}

    if translation['target_transliteration'] and not re.match(
        re_transliteration_unsupported, translation['target_transliteration']
    ):
-        infobox = f"<b>{translation['target_transliteration']}</b>"
+        data['transliteration'] = translation['target_transliteration']

    if translation['word_choices']:
        for word in translation['word_choices']:
-            infobox += f"<dl><dt>{word['word']}: {word['definition']}</dt>"
+            if word.get('definition'):
+                data['definitions'].append(word['definition'])

-            if word['examples_target']:
-                for example in word['examples_target']:
-                    infobox += f"<dd>{re.sub(r'<|>', '', example)}</dd>"
-                    infobox += f"<dd>{re.sub(r'<|>', '', example)}</dd>"
+            for example in word.get('examples_target', []):
+                data['examples'].append(re.sub(r"<|>", "", example).lstrip('- '))

-            infobox += "</dl>"
-
-    if translation['source_synonyms']:
-        infobox += f"<dl><dt>{gettext('Synonyms')}: {', '.join(translation['source_synonyms'])}</dt></dl>"
+    data['synonyms'] = translation.get('source_synonyms', [])

    result = {
-        'infobox': translation['translated-text'],
-        'content': infobox,
+        'answer': translation['translated-text'],
+        'answer_type': 'translations',
+        'translations': [data],
    }

    return [result]
--- a/searx/engines/translated.py
+++ b/searx/engines/translated.py
@ -35,18 +35,16 @@ def request(query, params):  # pylint: disable=unused-argument


 def response(resp):
-    results = []
-    results.append(
-        {
-            'url': web_url.format(
-                from_lang=resp.search_params['from_lang'][2],
-                to_lang=resp.search_params['to_lang'][2],
-                query=resp.search_params['query'],
-            ),
-            'title': '[{0}-{1}] {2}'.format(
-                resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], resp.search_params['query']
-            ),
-            'content': resp.json()['responseData']['translatedText'],
-        }
-    )
-    return results
+    json_resp = resp.json()
+    text = json_resp['responseData']['translatedText']
+
+    alternatives = [match['translation'] for match in json_resp['matches'] if match['translation'] != text]
+    translations = [{'text': translation} for translation in [text] + alternatives]
+
+    result = {
+        'answer': translations[0]['text'],
+        'answer_type': 'translations',
+        'translations': translations,
+    }
+
+    return [result]
--- a/searx/templates/simple/answerers/translate.html
+++ b/searx/templates/simple/answerers/translate.html
@ -0,0 +1,38 @@
+<div class="answer-translations">
+{% for translation in translations %}
+  {% if loop.index > 1 %}
+  <hr />
+  {% endif %}
+  <h3>{{ translation.text }}</h3>
+  {% if translation.transliteration %}
+  <b>translation.transliteration</b>
+  {% endif %} {% if translation.definitions %}
+  <dl>
+    <dt>{{ _('Definitions') }}</dt>
+    <ul>
+    {% for definition in translation.definitions %}
+    <li>{{ definition }}</li>
+    {% endfor %}
+    <ul>
+  </dl>
+  {% endif %} {% if translation.examples %}
+  <dl>
+    <dt>{{ _('Examples') }}</dt>
+    <ul>
+    {% for example in translation.examples %}
+    <li>{{ example }}</li>
+    {% endfor %}
+    </ul>
+  </dl>
+  {% endif %} {% if translation.synonyms %}
+  <dl>
+    <dt>{{ _('Synonyms') }}</dt>
+    <ul>
+    {% for synonym in translation.synonyms %}
+    <li>{{ synonym }}</li>
+    {% endfor %}
+    </ul>
+  </dl>
+  {% endif %}
+{% endfor %}
+</div>
--- a/searx/templates/simple/results.html
+++ b/searx/templates/simple/results.html
@ -23,14 +23,20 @@
    <div id="answers" role="complementary" aria-labelledby="answers-title"><h4 class="title" id="answers-title">{{ _('Answers') }} : </h4>
        {%- for answer in answers.values() -%}
        <div class="answer">
-        <span>{{ answer.answer }}</span>
-          {%- if answer.url -%}
-          <a href="{{ answer.url }}" class="answer-url"
-             {%- if results_on_new_tab %} target="_blank" rel="noopener noreferrer"
-             {%- else -%} rel="noreferrer"
-             {%- endif -%}
-             >{{ urlparse(answer.url).hostname }}</a>
-          {% endif -%}
+          {%- if answer.answer_type == 'translations' -%}
+            {% with translations=answer.translations %}
+              {% include 'simple/answerers/translate.html' %}
+            {% endwith %}
+          {%- else -%}
+            <span>{{ answer.answer }}</span>
+            {%- if answer.url -%}
+              <a href="{{ answer.url }}" class="answer-url"
+                 {%- if results_on_new_tab %} target="_blank" rel="noopener noreferrer"
+                 {%- else -%} rel="noreferrer"
+                 {%- endif -%}
+                 >{{ urlparse(answer.url).hostname }}</a>
+            {% endif -%}
+          {%- endif -%}
        </div>
        {%- endfor -%}
    </div>