Merge e28a69515c into 10d3af84b8

[fix] engine: duckduckgo - don't quote query string
The query string send to DDG must not be qouted. The query string was URL-qouted in #4011, but the URL-qouted query string result in unexpected *URL decoded* and other garbish results as reported in #4019 and #4020. To test compare the results of a query like:: !ddg Häuser und Straßen :de !ddg Häuser und Straßen :all !ddg 房屋和街道 :all !ddg 房屋和街道 :zh Closed: - [#4019] https://github.com/searxng/searxng/issues/4019 - [#4020] https://github.com/searxng/searxng/issues/4020 Related: - [#4011] https://github.com/searxng/searxng/pull/4011 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-18 09:11:31 +01:00 · 2024-11-17 18:14:22 +01:00 · 2024-06-23 12:05:15 +02:00
6 changed files with 72 additions and 8 deletions
--- a/dockerfiles/uwsgi.ini
+++ b/dockerfiles/uwsgi.ini
@ -1,3 +1,4 @@
+# -*- mode: conf-unix; coding: utf-8  -*-
 [uwsgi]
 # Who will run the code
 uid = searxng
@ -6,6 +7,13 @@ gid = searxng
 # Number of workers (usually CPU count)
 # default value: %k (= number of CPU core, see Dockerfile)
 workers = $(UWSGI_WORKERS)
+harakiri = 60
+# max-requests = 1000                  # Restart workers after this many requests
+# max-worker-lifetime = 3600           # Restart workers after this many seconds
+reload-on-rss = 4096                 # Restart workers after this much resident memory
+worker-reload-mercy = 60             # How long to wait before forcefully killing workers
+die-on-term = true                   # Shutdown when receiving SIGTERM (default is respawn)
+py-callos-afterfork = true           # allow workers to trap signals

 # Number of threads per worker
 # default value: 4 (see Dockerfile)
@ -17,6 +25,9 @@ chmod-socket = 666
 # Plugin to use and interpreter config
 single-interpreter = true
 master = true
+strict = true
+vacuum = true                        # Delete sockets during shutdown
+need-app = true
 plugin = python3
 lazy-apps = true
 enable-threads = true
@ -33,7 +44,8 @@ auto-procname = true

 # Disable request logging for privacy
 disable-logging = true
-log-5xx = true
+log-4xx = true                       # but log 4xx's anyway
+log-5xx = true                       # and 5xx's

 # Set the max size of a request (request-body excluded)
 buffer-size = 8192
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@ -6,7 +6,7 @@ DuckDuckGo Lite

 from typing import TYPE_CHECKING
 import re
-from urllib.parse import urlencode, quote_plus
+from urllib.parse import urlencode
 import json
 import babel
 import lxml.html
@ -263,7 +263,7 @@ def request(query, params):

    params['url'] = url
    params['method'] = 'POST'
-    params['data']['q'] = quote_plus(query)
+    params['data']['q'] = query

    # The API is not documented, so we do some reverse engineering and emulate
    # what https://html.duckduckgo.com/html does when you press "next Page" link
@ -381,7 +381,11 @@ def response(resp):
    zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
    zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()

-    if zero_click and "Your IP address is" not in zero_click and "Your user agent:" not in zero_click:
+    if zero_click and (
+        "Your IP address is" not in zero_click
+        and "Your user agent:" not in zero_click
+        and "URL Decoded:" not in zero_click
+    ):
        current_query = resp.search_params["data"].get("q")

        results.append(
--- a/utils/templates/etc/uwsgi/apps-archlinux/searxng.ini
+++ b/utils/templates/etc/uwsgi/apps-archlinux/searxng.ini
@ -1,4 +1,4 @@
-# -*- mode: conf; coding: utf-8  -*-
+# -*- mode: conf-unix; coding: utf-8  -*-
 [uwsgi]

 # uWSGI core
@ -24,6 +24,8 @@ env = SEARXNG_SETTINGS_PATH=${SEARXNG_SETTINGS_PATH}
 # disable logging for privacy
 logger = systemd
 disable-logging = true
+log-4xx = true                       # but log 4xx's anyway
+log-5xx = true                       # and 5xx's

 # The right granted on the created socket
 chmod-socket = 666
@ -33,6 +35,9 @@ single-interpreter = true

 # enable master process
 master = true
+strict = true
+vacuum = true                        # Delete sockets during shutdown
+need-app = true

 # load apps in each worker instead of the master
 lazy-apps = true
@ -50,6 +55,13 @@ enable-threads = true
 # Number of workers (usually CPU count)
 workers = ${UWSGI_WORKERS:-%k}
 threads = ${UWSGI_THREADS:-4}
+harakiri = 60
+# max-requests = 1000                  # Restart workers after this many requests
+# max-worker-lifetime = 3600           # Restart workers after this many seconds
+reload-on-rss = 4096                 # Restart workers after this much resident memory
+worker-reload-mercy = 60             # How long to wait before forcefully killing workers
+die-on-term = true                   # Shutdown when receiving SIGTERM (default is respawn)
+py-callos-afterfork = true           # allow workers to trap signals

 # plugin: python
 # --------------
--- a/utils/templates/etc/uwsgi/apps-archlinux/searxng.ini:socket
+++ b/utils/templates/etc/uwsgi/apps-archlinux/searxng.ini:socket
@ -1,4 +1,4 @@
-# -*- mode: conf; coding: utf-8  -*-
+# -*- mode: conf-unix; coding: utf-8  -*-
 [uwsgi]

 # uWSGI core
@ -24,6 +24,8 @@ env = SEARXNG_SETTINGS_PATH=${SEARXNG_SETTINGS_PATH}
 # disable logging for privacy
 logger = systemd
 disable-logging = true
+log-4xx = true                       # but log 4xx's anyway
+log-5xx = true                       # and 5xx's

 # The right granted on the created socket
 chmod-socket = 666
@ -33,6 +35,9 @@ single-interpreter = true

 # enable master process
 master = true
+strict = true
+vacuum = true                        # Delete sockets during shutdown
+need-app = true

 # load apps in each worker instead of the master
 lazy-apps = true
@ -50,6 +55,13 @@ enable-threads = true
 # Number of workers (usually CPU count)
 workers = ${UWSGI_WORKERS:-%k}
 threads = ${UWSGI_THREADS:-4}
+harakiri = 60
+# max-requests = 1000                  # Restart workers after this many requests
+# max-worker-lifetime = 3600           # Restart workers after this many seconds
+reload-on-rss = 4096                 # Restart workers after this much resident memory
+worker-reload-mercy = 60             # How long to wait before forcefully killing workers
+die-on-term = true                   # Shutdown when receiving SIGTERM (default is respawn)
+py-callos-afterfork = true           # allow workers to trap signals

 # plugin: python
 # --------------
--- a/utils/templates/etc/uwsgi/apps-available/searxng.ini
+++ b/utils/templates/etc/uwsgi/apps-available/searxng.ini
@ -1,4 +1,4 @@
-# -*- mode: conf; coding: utf-8  -*-
+# -*- mode: conf-unix; coding: utf-8  -*-
 [uwsgi]

 # uWSGI core
@ -27,6 +27,8 @@ env = SEARXNG_SETTINGS_PATH=${SEARXNG_SETTINGS_PATH}

 # disable logging for privacy
 disable-logging = true
+log-4xx = true                       # but log 4xx's anyway
+log-5xx = true                       # and 5xx's

 # The right granted on the created socket
 chmod-socket = 666
@ -36,6 +38,9 @@ single-interpreter = true

 # enable master process
 master = true
+strict = true
+vacuum = true                        # Delete sockets during shutdown
+need-app = true

 # load apps in each worker instead of the master
 lazy-apps = true
@ -53,6 +58,13 @@ enable-threads = true
 # Number of workers (usually CPU count)
 workers = ${UWSGI_WORKERS:-%k}
 threads = ${UWSGI_THREADS:-4}
+harakiri = 60
+# max-requests = 1000                  # Restart workers after this many requests
+# max-worker-lifetime = 3600           # Restart workers after this many seconds
+reload-on-rss = 4096                 # Restart workers after this much resident memory
+worker-reload-mercy = 60             # How long to wait before forcefully killing workers
+die-on-term = true                   # Shutdown when receiving SIGTERM (default is respawn)
+py-callos-afterfork = true           # allow workers to trap signals

 # plugin: python
 # --------------
--- a/utils/templates/etc/uwsgi/apps-available/searxng.ini:socket
+++ b/utils/templates/etc/uwsgi/apps-available/searxng.ini:socket
@ -1,4 +1,4 @@
-# -*- mode: conf; coding: utf-8  -*-
+# -*- mode: conf-unix; coding: utf-8  -*-
 [uwsgi]

 # uWSGI core
@ -27,6 +27,8 @@ env = SEARXNG_SETTINGS_PATH=${SEARXNG_SETTINGS_PATH}

 # disable logging for privacy
 disable-logging = true
+log-4xx = true                       # but log 4xx's anyway
+log-5xx = true                       # and 5xx's

 # The right granted on the created socket
 chmod-socket = 666
@ -36,6 +38,9 @@ single-interpreter = true

 # enable master process
 master = true
+strict = true
+vacuum = true                        # Delete sockets during shutdown
+need-app = true

 # load apps in each worker instead of the master
 lazy-apps = true
@ -53,6 +58,13 @@ enable-threads = true
 # Number of workers (usually CPU count)
 workers = ${UWSGI_WORKERS:-%k}
 threads = ${UWSGI_THREADS:-4}
+harakiri = 60
+# max-requests = 1000                  # Restart workers after this many requests
+# max-worker-lifetime = 3600           # Restart workers after this many seconds
+reload-on-rss = 4096                 # Restart workers after this much resident memory
+worker-reload-mercy = 60             # How long to wait before forcefully killing workers
+die-on-term = true                   # Shutdown when receiving SIGTERM (default is respawn)
+py-callos-afterfork = true           # allow workers to trap signals

 # plugin: python
 # --------------