Compare commits

...

3 Commits

Author SHA1 Message Date
Markus Heiser a4ebfbf654
Merge e28a69515c into 10d3af84b8 2024-11-18 09:11:31 +01:00
Markus Heiser 10d3af84b8 [fix] engine: duckduckgo - don't quote query string
The query string send to DDG must not be qouted.

The query string was URL-qouted in #4011, but the URL-qouted query string result
in unexpected *URL decoded* and other garbish results as reported in #4019
and #4020.  To test compare the results of a query like::

    !ddg Häuser und Straßen :de
    !ddg Häuser und Straßen :all
    !ddg 房屋和街道 :all
    !ddg 房屋和街道 :zh

Closed:

- [#4019] https://github.com/searxng/searxng/issues/4019
- [#4020] https://github.com/searxng/searxng/issues/4020

Related:

- [#4011] https://github.com/searxng/searxng/pull/4011

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-17 18:14:22 +01:00
Markus Heiser e28a69515c [mod] uWSGI config: configuring uwsgi for production
As stated in .. and other posts, the defaults of uWSGI not suitable for a
productive environment.  To give just one example, the workers run indefinitely
and the memory leaks aggregate.

- "Configuring uWSGI for Production: The defaults are all wrong" EuroPython 2019 [1]
- "Configuring uWSGI for Production Deployment" [2]
- "When Paul has tested some PR on his instance, we could clearly see a memory
  leak over a week: the memory never dropped to the initial value. Same for my
  instance using Docker." [3]

[1] https://av.tib.eu/media/44810
[2] https://www.bloomberg.com/company/stories/configuring-uwsgi-production-deployment/
[3] https://github.com/searxng/searxng/pull/3443#issuecomment-2094347004

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-06-23 12:05:15 +02:00
6 changed files with 72 additions and 8 deletions

View File

@ -1,3 +1,4 @@
# -*- mode: conf-unix; coding: utf-8 -*-
[uwsgi] [uwsgi]
# Who will run the code # Who will run the code
uid = searxng uid = searxng
@ -6,6 +7,13 @@ gid = searxng
# Number of workers (usually CPU count) # Number of workers (usually CPU count)
# default value: %k (= number of CPU core, see Dockerfile) # default value: %k (= number of CPU core, see Dockerfile)
workers = $(UWSGI_WORKERS) workers = $(UWSGI_WORKERS)
harakiri = 60
# max-requests = 1000 # Restart workers after this many requests
# max-worker-lifetime = 3600 # Restart workers after this many seconds
reload-on-rss = 4096 # Restart workers after this much resident memory
worker-reload-mercy = 60 # How long to wait before forcefully killing workers
die-on-term = true # Shutdown when receiving SIGTERM (default is respawn)
py-callos-afterfork = true # allow workers to trap signals
# Number of threads per worker # Number of threads per worker
# default value: 4 (see Dockerfile) # default value: 4 (see Dockerfile)
@ -17,6 +25,9 @@ chmod-socket = 666
# Plugin to use and interpreter config # Plugin to use and interpreter config
single-interpreter = true single-interpreter = true
master = true master = true
strict = true
vacuum = true # Delete sockets during shutdown
need-app = true
plugin = python3 plugin = python3
lazy-apps = true lazy-apps = true
enable-threads = true enable-threads = true
@ -33,7 +44,8 @@ auto-procname = true
# Disable request logging for privacy # Disable request logging for privacy
disable-logging = true disable-logging = true
log-5xx = true log-4xx = true # but log 4xx's anyway
log-5xx = true # and 5xx's
# Set the max size of a request (request-body excluded) # Set the max size of a request (request-body excluded)
buffer-size = 8192 buffer-size = 8192

View File

@ -6,7 +6,7 @@ DuckDuckGo Lite
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import re import re
from urllib.parse import urlencode, quote_plus from urllib.parse import urlencode
import json import json
import babel import babel
import lxml.html import lxml.html
@ -263,7 +263,7 @@ def request(query, params):
params['url'] = url params['url'] = url
params['method'] = 'POST' params['method'] = 'POST'
params['data']['q'] = quote_plus(query) params['data']['q'] = query
# The API is not documented, so we do some reverse engineering and emulate # The API is not documented, so we do some reverse engineering and emulate
# what https://html.duckduckgo.com/html does when you press "next Page" link # what https://html.duckduckgo.com/html does when you press "next Page" link
@ -381,7 +381,11 @@ def response(resp):
zero_click_info_xpath = '//div[@id="zero_click_abstract"]' zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip() zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()
if zero_click and "Your IP address is" not in zero_click and "Your user agent:" not in zero_click: if zero_click and (
"Your IP address is" not in zero_click
and "Your user agent:" not in zero_click
and "URL Decoded:" not in zero_click
):
current_query = resp.search_params["data"].get("q") current_query = resp.search_params["data"].get("q")
results.append( results.append(

View File

@ -1,4 +1,4 @@
# -*- mode: conf; coding: utf-8 -*- # -*- mode: conf-unix; coding: utf-8 -*-
[uwsgi] [uwsgi]
# uWSGI core # uWSGI core
@ -24,6 +24,8 @@ env = SEARXNG_SETTINGS_PATH=${SEARXNG_SETTINGS_PATH}
# disable logging for privacy # disable logging for privacy
logger = systemd logger = systemd
disable-logging = true disable-logging = true
log-4xx = true # but log 4xx's anyway
log-5xx = true # and 5xx's
# The right granted on the created socket # The right granted on the created socket
chmod-socket = 666 chmod-socket = 666
@ -33,6 +35,9 @@ single-interpreter = true
# enable master process # enable master process
master = true master = true
strict = true
vacuum = true # Delete sockets during shutdown
need-app = true
# load apps in each worker instead of the master # load apps in each worker instead of the master
lazy-apps = true lazy-apps = true
@ -50,6 +55,13 @@ enable-threads = true
# Number of workers (usually CPU count) # Number of workers (usually CPU count)
workers = ${UWSGI_WORKERS:-%k} workers = ${UWSGI_WORKERS:-%k}
threads = ${UWSGI_THREADS:-4} threads = ${UWSGI_THREADS:-4}
harakiri = 60
# max-requests = 1000 # Restart workers after this many requests
# max-worker-lifetime = 3600 # Restart workers after this many seconds
reload-on-rss = 4096 # Restart workers after this much resident memory
worker-reload-mercy = 60 # How long to wait before forcefully killing workers
die-on-term = true # Shutdown when receiving SIGTERM (default is respawn)
py-callos-afterfork = true # allow workers to trap signals
# plugin: python # plugin: python
# -------------- # --------------

View File

@ -1,4 +1,4 @@
# -*- mode: conf; coding: utf-8 -*- # -*- mode: conf-unix; coding: utf-8 -*-
[uwsgi] [uwsgi]
# uWSGI core # uWSGI core
@ -24,6 +24,8 @@ env = SEARXNG_SETTINGS_PATH=${SEARXNG_SETTINGS_PATH}
# disable logging for privacy # disable logging for privacy
logger = systemd logger = systemd
disable-logging = true disable-logging = true
log-4xx = true # but log 4xx's anyway
log-5xx = true # and 5xx's
# The right granted on the created socket # The right granted on the created socket
chmod-socket = 666 chmod-socket = 666
@ -33,6 +35,9 @@ single-interpreter = true
# enable master process # enable master process
master = true master = true
strict = true
vacuum = true # Delete sockets during shutdown
need-app = true
# load apps in each worker instead of the master # load apps in each worker instead of the master
lazy-apps = true lazy-apps = true
@ -50,6 +55,13 @@ enable-threads = true
# Number of workers (usually CPU count) # Number of workers (usually CPU count)
workers = ${UWSGI_WORKERS:-%k} workers = ${UWSGI_WORKERS:-%k}
threads = ${UWSGI_THREADS:-4} threads = ${UWSGI_THREADS:-4}
harakiri = 60
# max-requests = 1000 # Restart workers after this many requests
# max-worker-lifetime = 3600 # Restart workers after this many seconds
reload-on-rss = 4096 # Restart workers after this much resident memory
worker-reload-mercy = 60 # How long to wait before forcefully killing workers
die-on-term = true # Shutdown when receiving SIGTERM (default is respawn)
py-callos-afterfork = true # allow workers to trap signals
# plugin: python # plugin: python
# -------------- # --------------

View File

@ -1,4 +1,4 @@
# -*- mode: conf; coding: utf-8 -*- # -*- mode: conf-unix; coding: utf-8 -*-
[uwsgi] [uwsgi]
# uWSGI core # uWSGI core
@ -27,6 +27,8 @@ env = SEARXNG_SETTINGS_PATH=${SEARXNG_SETTINGS_PATH}
# disable logging for privacy # disable logging for privacy
disable-logging = true disable-logging = true
log-4xx = true # but log 4xx's anyway
log-5xx = true # and 5xx's
# The right granted on the created socket # The right granted on the created socket
chmod-socket = 666 chmod-socket = 666
@ -36,6 +38,9 @@ single-interpreter = true
# enable master process # enable master process
master = true master = true
strict = true
vacuum = true # Delete sockets during shutdown
need-app = true
# load apps in each worker instead of the master # load apps in each worker instead of the master
lazy-apps = true lazy-apps = true
@ -53,6 +58,13 @@ enable-threads = true
# Number of workers (usually CPU count) # Number of workers (usually CPU count)
workers = ${UWSGI_WORKERS:-%k} workers = ${UWSGI_WORKERS:-%k}
threads = ${UWSGI_THREADS:-4} threads = ${UWSGI_THREADS:-4}
harakiri = 60
# max-requests = 1000 # Restart workers after this many requests
# max-worker-lifetime = 3600 # Restart workers after this many seconds
reload-on-rss = 4096 # Restart workers after this much resident memory
worker-reload-mercy = 60 # How long to wait before forcefully killing workers
die-on-term = true # Shutdown when receiving SIGTERM (default is respawn)
py-callos-afterfork = true # allow workers to trap signals
# plugin: python # plugin: python
# -------------- # --------------

View File

@ -1,4 +1,4 @@
# -*- mode: conf; coding: utf-8 -*- # -*- mode: conf-unix; coding: utf-8 -*-
[uwsgi] [uwsgi]
# uWSGI core # uWSGI core
@ -27,6 +27,8 @@ env = SEARXNG_SETTINGS_PATH=${SEARXNG_SETTINGS_PATH}
# disable logging for privacy # disable logging for privacy
disable-logging = true disable-logging = true
log-4xx = true # but log 4xx's anyway
log-5xx = true # and 5xx's
# The right granted on the created socket # The right granted on the created socket
chmod-socket = 666 chmod-socket = 666
@ -36,6 +38,9 @@ single-interpreter = true
# enable master process # enable master process
master = true master = true
strict = true
vacuum = true # Delete sockets during shutdown
need-app = true
# load apps in each worker instead of the master # load apps in each worker instead of the master
lazy-apps = true lazy-apps = true
@ -53,6 +58,13 @@ enable-threads = true
# Number of workers (usually CPU count) # Number of workers (usually CPU count)
workers = ${UWSGI_WORKERS:-%k} workers = ${UWSGI_WORKERS:-%k}
threads = ${UWSGI_THREADS:-4} threads = ${UWSGI_THREADS:-4}
harakiri = 60
# max-requests = 1000 # Restart workers after this many requests
# max-worker-lifetime = 3600 # Restart workers after this many seconds
reload-on-rss = 4096 # Restart workers after this much resident memory
worker-reload-mercy = 60 # How long to wait before forcefully killing workers
die-on-term = true # Shutdown when receiving SIGTERM (default is respawn)
py-callos-afterfork = true # allow workers to trap signals
# plugin: python # plugin: python
# -------------- # --------------