Merge d5b0fb3d03 into 10d3af84b8

[fix] engine: duckduckgo - don't quote query string
The query string send to DDG must not be qouted. The query string was URL-qouted in #4011, but the URL-qouted query string result in unexpected *URL decoded* and other garbish results as reported in #4019 and #4020. To test compare the results of a query like:: !ddg Häuser und Straßen :de !ddg Häuser und Straßen :all !ddg 房屋和街道 :all !ddg 房屋和街道 :zh Closed: - [#4019] https://github.com/searxng/searxng/issues/4019 - [#4020] https://github.com/searxng/searxng/issues/4020 Related: - [#4011] https://github.com/searxng/searxng/pull/4011 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-19 11:39:18 +01:00 · 2024-11-17 18:14:22 +01:00 · 2024-11-09 17:46:49 +01:00
3 changed files with 81 additions and 3 deletions
--- a/searx/engines/adobe_stock.py
+++ b/searx/engines/adobe_stock.py
@ -0,0 +1,67 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Adobe Stock (images)
 """
 from urllib.parse import urlencode
 from searx.utils import gen_useragent
 about = {
    "website": 'https://stock.adobe.com/',
    "wikidata_id": 'Q5977430',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON',
 }
 categories = ['images']
 paging = True
 base_url = 'https://stock.adobe.com'
 results_per_page = 10
 adobe_order = "relevance"  # one of 'relevant', 'featured', 'creation' or 'nb_downloads'
 def request(query, params):
    args = {
        'k': query,
        'limit': results_per_page,
        'order': adobe_order,
        'search_page': params['pageno'],
        'search_type': 'pagination',
        'filters[content_type:video]': 0,
        'filters[content_type:audio]': 0,
    }
    params['url'] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
    # headers required to bypass bot-detection
    params['headers'] = {
        "User-Agent": gen_useragent(),
        "Accept-Language": "en-US,en;q=0.5",
    }
    return params
 def response(resp):
    results = []
    json_resp = resp.json()
    for item in json_resp['items'].values():
        results.append(
            {
                'template': 'images.html',
                'url': item['content_url'],
                'title': item['title'],
                'content': '',
                'img_src': item['content_thumb_extra_large_url'],
                'thumbnail_src': item['thumbnail_url'],
                'resolution': f"{item['content_original_width']}x{item['content_original_height']}",
                'img_format': item['format'],
                'author': item['author'],
            }
        )
    return results
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@ -6,7 +6,7 @@ DuckDuckGo Lite
 from typing import TYPE_CHECKING
 import re
-from urllib.parse import urlencode, quote_plus
+from urllib.parse import urlencode
 import json
 import babel
 import lxml.html
@ -263,7 +263,7 @@ def request(query, params):
    params['url'] = url
    params['method'] = 'POST'
-    params['data']['q'] = quote_plus(query)
+    params['data']['q'] = query
    # The API is not documented, so we do some reverse engineering and emulate
    # what https://html.duckduckgo.com/html does when you press "next Page" link
@ -381,7 +381,11 @@ def response(resp):
    zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
    zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()
-    if zero_click and "Your IP address is" not in zero_click and "Your user agent:" not in zero_click:
+    if zero_click and (
        "Your IP address is" not in zero_click
        and "Your user agent:" not in zero_click
        and "URL Decoded:" not in zero_click
    ):
        current_query = resp.search_params["data"].get("q")
        results.append(
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -325,6 +325,13 @@ engines:
    shortcut: 9g
    disabled: true
  - name: adobe stock
    engine: adobe_stock
    # available search orders: 'relevant', 'featured', 'creation', 'nb_downloads'
    # adobe_order: relevance
    shortcut: as
    disabled: true
  - name: alpine linux packages
    engine: alpinelinux
    disabled: true