Compare commits

..

2 Commits

Author SHA1 Message Date
Markus Heiser 90bfd8a275 [fix] issues reported by `make test.yamllint`
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-03-09 15:25:28 +01:00
Markus Heiser e1a4cf9488 [mod] lint github YAML config files
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-03-09 15:23:28 +01:00
543 changed files with 61506 additions and 114267 deletions

View File

@ -16,9 +16,6 @@ max_line_length = 119
[*.html]
indent_size = 4
[*.js]
indent_size = 2
[*.json]
indent_size = 4
insert_final_newline = ignore

View File

@ -1,5 +1,5 @@
blank_issues_enabled: true
contact_links:
- name: Questions & Answers (Q&A)
- name: "Questions & Answers (Q&A)"
url: https://github.com/searxng/searxng/discussions/categories/q-a
about: Ask questions and find answers

View File

@ -7,7 +7,7 @@ on: # yamllint disable-line rule:truthy
jobs:
updateData:
name: Update data - ${{ matrix.fetch }}
runs-on: ubuntu-24.04
runs-on: ubuntu-20.04
if: ${{ github.repository_owner == 'searxng'}}
strategy:
fail-fast: false
@ -29,9 +29,9 @@ jobs:
sudo ./utils/searxng.sh install packages
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v2
with:
python-version: '3.12'
python-version: '3.9'
architecture: 'x64'
- name: Install Python dependencies
@ -46,7 +46,7 @@ jobs:
- name: Create Pull Request
id: cpr
uses: peter-evans/create-pull-request@v6
uses: peter-evans/create-pull-request@v3
with:
commit-message: '[data] update searx.data - ${{ matrix.fetch }}'
committer: searxng-bot <noreply@github.com>

View File

@ -16,7 +16,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-20.04]
python-version: ["3.9", "3.10", "3.11", "3.12"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- name: Checkout
uses: actions/checkout@v4
@ -25,7 +25,7 @@ jobs:
sudo ./utils/searxng.sh install packages
sudo apt install firefox
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: 'x64'
@ -45,6 +45,14 @@ jobs:
make V=1 gecko.driver
- name: Run tests
run: make V=1 ci.test
- name: Test coverage
run: make V=1 test.coverage
- name: Store coverage result
uses: actions/upload-artifact@v3
with:
name: coverage-${{ matrix.python-version }}
path: coverage/
retention-days: 60
themes:
name: Themes
@ -55,9 +63,9 @@ jobs:
- name: Install Ubuntu packages
run: sudo ./utils/searxng.sh install buildhost
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: '3.12'
python-version: '3.9'
architecture: 'x64'
- name: Cache Python dependencies
id: cache-python
@ -67,7 +75,7 @@ jobs:
./local
./.nvm
./node_modules
key: python-ubuntu-20.04-3.12-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
key: python-ubuntu-20.04-3.9-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
- name: Install node dependencies
run: make V=1 node.env
- name: Build themes
@ -87,9 +95,9 @@ jobs:
- name: Install Ubuntu packages
run: sudo ./utils/searxng.sh install buildhost
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: '3.12'
python-version: '3.9'
architecture: 'x64'
- name: Cache Python dependencies
id: cache-python
@ -99,7 +107,7 @@ jobs:
./local
./.nvm
./node_modules
key: python-ubuntu-20.04-3.12-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
key: python-ubuntu-20.04-3.9-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
- name: Build documentation
run: |
make V=1 docs.clean docs.html
@ -131,9 +139,9 @@ jobs:
fetch-depth: '0'
token: ${{ secrets.WEBLATE_GITHUB_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: '3.12'
python-version: '3.9'
architecture: 'x64'
- name: Cache Python dependencies
id: cache-python
@ -143,7 +151,7 @@ jobs:
./local
./.nvm
./node_modules
key: python-ubuntu-20.04-3.12-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
key: python-ubuntu-20.04-3.9-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
- name: weblate & git setup
env:
WEBLATE_CONFIG: ${{ secrets.WEBLATE_CONFIG }}
@ -175,9 +183,9 @@ jobs:
# make sure "make docker.push" can get the git history
fetch-depth: '0'
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: '3.12'
python-version: '3.9'
architecture: 'x64'
- name: Cache Python dependencies
id: cache-python
@ -187,7 +195,7 @@ jobs:
./local
./.nvm
./node_modules
key: python-ubuntu-20.04-3.12-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
key: python-ubuntu-20.04-3.9-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
- name: Set up QEMU
if: env.DOCKERHUB_USERNAME != null
uses: docker/setup-qemu-action@v1

View File

@ -16,9 +16,9 @@ jobs:
fetch-depth: '0'
token: ${{ secrets.WEBLATE_GITHUB_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: '3.12'
python-version: '3.9'
architecture: 'x64'
- name: Cache Python dependencies
id: cache-python
@ -28,7 +28,7 @@ jobs:
./local
./.nvm
./node_modules
key: python-ubuntu-20.04-3.12-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
key: python-ubuntu-20.04-3.9-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
- name: weblate & git setup
env:
WEBLATE_CONFIG: ${{ secrets.WEBLATE_CONFIG }}

3
.gitignore vendored
View File

@ -23,6 +23,3 @@ gh-pages/
.idea/
searx/version_frozen.py
.dir-locals.el
.python-version

View File

@ -1,4 +1,4 @@
# -*- coding: utf-8; mode: conf-unix -*-
# -*- coding: utf-8; mode: conf -*-
# lint Python modules using external checkers.
#
# This is the main checker controlling the other ones and the reports
@ -27,7 +27,7 @@ ignore-patterns=
#init-hook=
# Use multiple processes to speed up Pylint.
jobs=0
jobs=1
# List of plugins (as comma separated values of python modules names) to load,
# usually to register additional checkers.
@ -338,7 +338,6 @@ valid-metaclass-classmethod-first-arg=mcs
# Maximum number of arguments for function / method
max-args=8
max-positional-arguments=14
# Maximum number of attributes for a class (see R0902).
max-attributes=20

View File

@ -172,5 +172,3 @@ features or generally made searx better:
- Bernie Huang `<https://github.com/BernieHuang2008>`
- Austin Olacsi `<https://github.com/Austin-Olacsi>`
- @micsthepick
- Daniel Kukula `<https://github.com/dkuku>`
- Patrick Evans `https://github.com/holysoles`

View File

@ -1,4 +1,4 @@
FROM alpine:3.20
FROM alpine:3.19
ENTRYPOINT ["/sbin/tini","--","/usr/local/searxng/dockerfiles/docker-entrypoint.sh"]
EXPOSE 8080
VOLUME /etc/searxng
@ -35,6 +35,7 @@ RUN apk add --no-cache -t build-dependencies \
git \
&& apk add --no-cache \
ca-certificates \
su-exec \
python3 \
py3-pip \
libxml2 \

View File

@ -66,7 +66,7 @@ A user_, admin_ and developer_ handbook is available on the homepage_.
Contact
=======
Ask questions or chat with the SearXNG community (this not a chatbot) on
Ask questions or just chat about SearXNG on
IRC
`#searxng on libera.chat <https://web.libera.chat/?channel=#searxng>`_

View File

@ -175,4 +175,4 @@ unset MORTY_KEY
# Start uwsgi
printf 'Listen on %s\n' "${BIND_ADDRESS}"
exec uwsgi --master --uid searxng --gid searxng --http-socket "${BIND_ADDRESS}" "${UWSGI_SETTINGS_PATH}"
exec su-exec searxng:searxng uwsgi --master --http-socket "${BIND_ADDRESS}" "${UWSGI_SETTINGS_PATH}"

View File

@ -42,10 +42,6 @@ buffer-size = 8192
# See https://github.com/searx/searx-docker/issues/24
add-header = Connection: close
# Follow SIGTERM convention
# See https://github.com/searxng/searxng/issues/3427
die-on-term
# uwsgi serves the static files
static-map = /static=/usr/local/searxng/searx/static
# expires set to one day

View File

@ -84,9 +84,9 @@ HTML of the site. URL of the SearXNG instance and values are customizable.
.. code:: html
<form method="post" action="https://example.org/">
<!-- search --> <input type="text" name="q">
<!-- categories --> <input type="hidden" name="categories" value="general,social media">
<!-- language --> <input type="hidden" name="lang" value="all">
<!-- locale --> <input type="hidden" name="locale" value="en">
<!-- date filter --> <input type="hidden" name="time_range" value="month">
<!-- search --> <input type="text" name="q" />
<!-- categories --> <input type="hidden" name="categories" value="general,social media" />
<!-- language --> <input type="hidden" name="lang" value="all" />
<!-- locale --> <input type="hidden" name="locale" value="en" />
<!-- date filter --> <input type="hidden" name="time_range" value="month" />
</form>

View File

@ -15,7 +15,6 @@ Administrator documentation
installation-apache
update-searxng
answer-captcha
searx.favicons
searx.limiter
api
architecture

View File

@ -96,7 +96,7 @@ Modify the ``/etc/searxng/settings.yml`` to your needs:
.. literalinclude:: ../../utils/templates/etc/searxng/settings.yml
:language: yaml
:end-before: # hostnames:
:end-before: # hostname_replace:
To see the entire file jump to :origin:`utils/templates/etc/searxng/settings.yml`
@ -104,7 +104,7 @@ Modify the ``/etc/searxng/settings.yml`` to your needs:
.. literalinclude:: ../../searx/settings.yml
:language: yaml
:end-before: # hostnames:
:end-before: # hostname_replace:
To see the entire file jump to :origin:`searx/settings.yml`

View File

@ -1,251 +0,0 @@
.. _favicons:
========
Favicons
========
.. sidebar:: warning
Don't activate the favicons before reading the documentation.
.. contents::
:depth: 2
:local:
:backlinks: entry
Activating the favicons in SearXNG is very easy, but this **generates a
significantly higher load** in the client/server communication and increases
resources needed on the server.
To mitigate these disadvantages, various methods have been implemented,
including a *cache*. The cache must be parameterized according to your own
requirements and maintained regularly.
To activate favicons in SearXNG's result list, set a default
``favicon_resolver`` in the :ref:`search <settings search>` settings:
.. code:: yaml
search:
favicon_resolver: "duckduckgo"
By default and without any extensions, SearXNG serves these resolvers:
- ``duckduckgo``
- ``allesedv``
- ``google``
- ``yandex``
With the above setting favicons are displayed, the user has the option to
deactivate this feature in his settings. If the user is to have the option of
selecting from several *resolvers*, a further setting is required / but this
setting will be discussed :ref:`later <register resolvers>` in this article,
first we have to setup the favicons cache.
Infrastructure
==============
The infrastructure for providing the favicons essentially consists of three
parts:
- :py:obj:`Favicons-Proxy <.favicons.proxy>` (aka *proxy*)
- :py:obj:`Favicons-Resolvers <.favicons.resolvers>` (aka *resolver*)
- :py:obj:`Favicons-Cache <.favicons.cache>` (aka *cache*)
To protect the privacy of users, the favicons are provided via a *proxy*. This
*proxy* is automatically activated with the above activation of a *resolver*.
Additional requests are required to provide the favicons: firstly, the *proxy*
must process the incoming requests and secondly, the *resolver* must make
outgoing requests to obtain the favicons from external sources.
A *cache* has been developed to massively reduce both, incoming and outgoing
requests. This *cache* is also activated automatically with the above
activation of a *resolver*. In its defaults, however, the *cache* is minimal
and not well suitable for a production environment!
.. _favicon cache setup:
Setting up the cache
====================
To parameterize the *cache* and more settings of the favicons infrastructure, a
TOML_ configuration is created in the file ``/etc/searxng/favicons.toml``.
.. code:: toml
[favicons]
cfg_schema = 1 # config's schema version no.
[favicons.cache]
db_url = "/var/cache/searxng/faviconcache.db" # default: "/tmp/faviconcache.db"
LIMIT_TOTAL_BYTES = 2147483648 # 2 GB / default: 50 MB
# HOLD_TIME = 5184000 # 60 days / default: 30 days
# BLOB_MAX_BYTES = 40960 # 40 KB / default 20 KB
# MAINTENANCE_MODE = "off" # default: "auto"
# MAINTENANCE_PERIOD = 600 # 10min / default: 1h
:py:obj:`cfg_schema <.FaviconConfig.cfg_schema>`:
Is required to trigger any processes required for future upgrades / don't
change it.
:py:obj:`cache.db_url <.FaviconCacheConfig.db_url>`:
The path to the (SQLite_) database file. The default path is in the `/tmp`_
folder, which is deleted on every reboot and is therefore unsuitable for a
production environment. The FHS_ provides the folder for the
application cache
The FHS_ provides the folder `/var/cache`_ for the cache of applications, so a
suitable storage location of SearXNG's caches is folder ``/var/cache/searxng``.
In container systems, a volume should be mounted for this folder and in a
standard installation (compare :ref:`create searxng user`), the folder must be
created and the user under which the SearXNG process is running must be given
write permission to this folder.
.. code:: bash
$ sudo mkdir /var/cache/searxng
$ sudo chown root:searxng /var/cache/searxng/
$ sudo chmod g+w /var/cache/searxng/
:py:obj:`cache.LIMIT_TOTAL_BYTES <.FaviconCacheConfig.LIMIT_TOTAL_BYTES>`:
Maximum of bytes stored in the cache of all blobs. The limit is only reached
at each maintenance interval after which the oldest BLOBs are deleted; the
limit is exceeded during the maintenance period.
.. attention::
If the maintenance period is too long or maintenance is switched
off completely, the cache grows uncontrollably.
SearXNG hosters can change other parameters of the cache as required:
- :py:obj:`cache.HOLD_TIME <.FaviconCacheConfig.HOLD_TIME>`
- :py:obj:`cache.BLOB_MAX_BYTES <.FaviconCacheConfig.BLOB_MAX_BYTES>`
Maintenance of the cache
------------------------
Regular maintenance of the cache is required! By default, regular maintenance
is triggered automatically as part of the client requests:
- :py:obj:`cache.MAINTENANCE_MODE <.FaviconCacheConfig.MAINTENANCE_MODE>` (default ``auto``)
- :py:obj:`cache.MAINTENANCE_PERIOD <.FaviconCacheConfig.MAINTENANCE_PERIOD>` (default ``6000`` / 1h)
As an alternative to maintenance as part of the client request process, it is
also possible to carry out maintenance using an external process. For example,
by creating a :man:`crontab` entry for maintenance:
.. code:: bash
$ python -m searx.favicons cache maintenance
The following command can be used to display the state of the cache:
.. code:: bash
$ python -m searx.favicons cache state
.. _favicon proxy setup:
Proxy configuration
===================
Most of the options of the :py:obj:`Favicons-Proxy <.favicons.proxy>` are
already set sensibly with settings from the :ref:`settings.yml <searxng
settings.yml>` and should not normally be adjusted.
.. code:: toml
[favicons.proxy]
max_age = 5184000 # 60 days / default: 7 days (604800 sec)
:py:obj:`max_age <.FaviconProxyConfig.max_age>`:
The `HTTP Cache-Control max-age`_ response directive indicates that the
response remains fresh until N seconds after the response is generated. This
setting therefore determines how long a favicon remains in the client's cache.
As a rule, in the favicons infrastructure of SearXNG's this setting only
affects favicons whose byte size exceeds :ref:`BLOB_MAX_BYTES <favicon cache
setup>` (the other favicons that are already in the cache are embedded as
`data URL`_ in the :py:obj:`generated HTML <.favicons.proxy.favicon_url>`,
which can greatly reduce the number of additional requests).
.. _register resolvers:
Register resolvers
------------------
A :py:obj:`resolver <.favicon.resolvers>` is a function that obtains the favicon
from an external source. The resolver functions available to the user are
registered with their fully qualified name (FQN_) in a ``resolver_map``.
If no ``resolver_map`` is defined in the ``favicon.toml``, the favicon
infrastructure of SearXNG generates this ``resolver_map`` automatically
depending on the ``settings.yml``. SearXNG would automatically generate the
following TOML configuration from the following YAML configuration:
.. code:: yaml
search:
favicon_resolver: "duckduckgo"
.. code:: toml
[favicons.proxy.resolver_map]
"duckduckgo" = "searx.favicons.resolvers.duckduckgo"
If this automatism is not desired, then (and only then) a separate
``resolver_map`` must be created. For example, to give the user two resolvers to
choose from, the following configuration could be used:
.. code:: toml
[favicons.proxy.resolver_map]
"duckduckgo" = "searx.favicons.resolvers.duckduckgo"
"allesedv" = "searx.favicons.resolvers.allesedv"
# "google" = "searx.favicons.resolvers.google"
# "yandex" = "searx.favicons.resolvers.yandex"
.. note::
With each resolver, the resource requirement increases significantly.
The number of resolvers increases:
- the number of incoming/outgoing requests and
- the number of favicons to be stored in the cache.
In the following we list the resolvers available in the core of SearXNG, but via
the FQN_ it is also possible to implement your own resolvers and integrate them
into the *proxy*:
- :py:obj:`searx.favicons.resolvers.duckduckgo`
- :py:obj:`searx.favicons.resolvers.allesedv`
- :py:obj:`searx.favicons.resolvers.google`
- :py:obj:`searx.favicons.resolvers.yandex`
.. _SQLite:
https://www.sqlite.org/
.. _FHS:
https://refspecs.linuxfoundation.org/FHS_3.0/fhs/index.html
.. _`/var/cache`:
https://refspecs.linuxfoundation.org/FHS_3.0/fhs/ch05s05.html
.. _`/tmp`:
https://refspecs.linuxfoundation.org/FHS_3.0/fhs/ch03s18.html
.. _TOML:
https://toml.io/en/
.. _HTTP Cache-Control max-age:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
.. _data URL:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs
.. _FQN: https://en.wikipedia.org/wiki/Fully_qualified_name

View File

@ -1,5 +1,3 @@
.. _searxng settings.yml:
========
Settings
========

View File

@ -204,7 +204,7 @@ Example configuration to restrict access to the Arch Linux Wiki engine:
tokens: [ 'my-secret-token' ]
Unless a user has configured the right token, the engine is going to be hidden
from them. It is not going to be included in the list of engines on the
from him/her. It is not going to be included in the list of engines on the
Preferences page and in the output of `/config` REST API call.
Tokens can be added to one's configuration on the Preferences page under "Engine

View File

@ -9,7 +9,6 @@
search:
safe_search: 0
autocomplete: ""
favicon_resolver: ""
default_lang: ""
ban_time_on_fail: 5
max_ban_time_on_fail: 120
@ -42,11 +41,6 @@
- ``qwant``
- ``wikipedia``
``favicon_resolver``:
To activate favicons in SearXNG's result list select a default
favicon-resolver, leave blank to turn off the feature. Don't activate the
favicons before reading the :ref:`Favicons documentation <favicons>`.
``default_lang``:
Default search language - leave blank to detect from browser information or
use codes from :origin:`searx/languages.py`.

View File

@ -31,13 +31,13 @@
``secret_key`` : ``$SEARXNG_SECRET``
Used for cryptography purpose.
``limiter`` : ``$SEARXNG_LIMITER``
``limiter`` :
Rate limit the number of request on the instance, block some bots. The
:ref:`limiter` requires a :ref:`settings redis` database.
.. _public_instance:
``public_instance`` : ``$SEARXNG_PUBLIC_INSTANCE``
``public_instance`` :
Setting that allows to enable features specifically for public instances (not
needed for local usage). By set to ``true`` the following features are
@ -47,7 +47,7 @@
.. _image_proxy:
``image_proxy`` : ``$SEARXNG_IMAGE_PROXY``
``image_proxy`` :
Allow your instance of SearXNG of being able to proxy images. Uses memory space.
.. _HTTP headers: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers

View File

@ -24,7 +24,7 @@
.. _static_use_hash:
``static_use_hash`` : ``$SEARXNG_STATIC_USE_HASH``
``static_use_hash`` :
Enables `cache busting`_ of static files.
``default_locale`` :
@ -58,7 +58,7 @@
Name of the theme you want to use by default on your SearXNG instance.
``theme_args.simple_style``:
Style of simple theme: ``auto``, ``light``, ``dark``, ``black``
Style of simple theme: ``auto``, ``light``, ``dark``
``results_on_new_tab``:
Open result links in a new tab by default.

View File

@ -113,7 +113,7 @@ ${fedora_build}
(${SERVICE_USER})$ command -v python && python --version
$SEARXNG_PYENV/bin/python
Python 3.11.10
Python 3.8.1
# update pip's boilerplate ..
pip install -U pip
@ -123,7 +123,7 @@ ${fedora_build}
# jump to SearXNG's working tree and install SearXNG into virtualenv
(${SERVICE_USER})$ cd \"$SEARXNG_SRC\"
(${SERVICE_USER})$ pip install --use-pep517 --no-build-isolation -e .
(${SERVICE_USER})$ pip install -e .
.. END manage.sh update_packages

View File

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: AGPL-3.0-or-later
import sys, os
@ -127,7 +128,6 @@ extensions = [
"sphinx_tabs.tabs", # https://github.com/djungelorm/sphinx-tabs
'myst_parser', # https://www.sphinx-doc.org/en/master/usage/markdown.html
'notfound.extension', # https://github.com/readthedocs/sphinx-notfound-page
'sphinxcontrib.autodoc_pydantic', # https://github.com/mansenfranzen/autodoc_pydantic
]
autodoc_default_options = {

View File

@ -339,8 +339,6 @@ type.
content *(not implemented yet)*
publishedDate :py:class:`datetime.datetime`, time of publish
thumbnail string, url to a small-preview image
length :py:class:`datetime.timedelta`, duration of result
views string, view count in humanized number format
========================= =====================================================
@ -565,7 +563,3 @@ type.
* - source_code_url
- :py:class:`str`
- the location of the project's source code
* - links
- :py:class:`dict`
- additional links in the form of ``{'link_name': 'http://example.com'}``

View File

@ -25,7 +25,7 @@ Relational Database Management System (RDBMS) are supported:
- :ref:`engine sqlite`
- :ref:`engine postgresql`
- :ref:`engine mysql_server` & :ref:`engine mariadb_server`
- :ref:`engine mysql_server`
All of the engines above are just commented out in the :origin:`settings.yml
<searx/settings.yml>`, as you have to set the required attributes for the
@ -119,16 +119,3 @@ MySQL
.. automodule:: searx.engines.mysql_server
:members:
.. _engine mariadb_server:
MariaDB
--------
.. sidebar:: info
- :origin:`mariadb_server.py <searx/engines/mariadb_server.py>`
- ``pip install`` :pypi:`mariadb <mariadb>`
.. automodule:: searx.engines.mariadb_server
:members:

View File

@ -1,13 +0,0 @@
.. _alpinelinux engine:
=====================
Alpine Linux Packages
=====================
.. contents::
:depth: 2
:local:
:backlinks: entry
.. automodule:: searx.engines.alpinelinux
:members:

View File

@ -1,8 +0,0 @@
.. _discourse engine:
================
Discourse Forums
================
.. automodule:: searx.engines.discourse
:members:

View File

@ -1,8 +0,0 @@
.. _gitea geizhals:
========
Geizhals
========
.. automodule:: searx.engines.geizhals
:members:

View File

@ -1,8 +0,0 @@
.. _gitea engine:
=====
Gitea
=====
.. automodule:: searx.engines.gitea
:members:

View File

@ -1,8 +0,0 @@
.. _gitlab engine:
======
GitLab
======
.. automodule:: searx.engines.gitlab
:members:

View File

@ -1,13 +0,0 @@
.. _voidlinux mullvad_leta:
============
Mullvad-Leta
============
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
.. automodule:: searx.engines.mullvad_leta
:members:

View File

@ -61,7 +61,7 @@ working tree and release a ``make install`` to get a virtualenv with a
$ make install
PYENV [virtualenv] installing ./requirements*.txt into local/py3
...
PYENV [install] pip install --use-pep517 --no-build-isolation -e 'searx[test]'
PYENV [install] pip install -e 'searx[test]'
...
Successfully installed searxng-2023.7.19+a446dea1b
@ -78,7 +78,7 @@ the check fails if you edit the requirements listed in
...
PYENV [virtualenv] installing ./requirements*.txt into local/py3
...
PYENV [install] pip install --use-pep517 --no-build-isolation -e 'searx[test]'
PYENV [install] pip install -e 'searx[test]'
...
Successfully installed searxng-2023.7.19+a446dea1b

View File

@ -103,14 +103,14 @@ Parameters
.. disabled by default
``Hostnames_plugin``, ``Open_Access_DOI_rewrite``,
``Hostname_replace``, ``Open_Access_DOI_rewrite``,
``Vim-like_hotkeys``, ``Tor_check_plugin``
``disabled_plugins``: optional
List of disabled plugins.
:default:
``Hostnames_plugin``, ``Open_Access_DOI_rewrite``,
``Hostname_replace``, ``Open_Access_DOI_rewrite``,
``Vim-like_hotkeys``, ``Tor_check_plugin``
:values:

View File

@ -17,7 +17,7 @@ If you don't trust anyone, you can set up your own, see :ref:`installation`.
- :ref:`no user tracking / no profiling <SearXNG protect privacy>`
- script & cookies are optional
- secure, encrypted connections
- :ref:`about 200 search engines <configured engines>`
- :ref:`about 130 search engines <configured engines>`
- `about 60 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_
- about 100 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_
- :ref:`easy integration of search engines <demo online engine>`

View File

@ -2,9 +2,9 @@
Why use a private instance?
===========================
.. sidebar:: Is running my own instance worth it?
.. sidebar:: Is it worth to run my own instance?
\.\.\.is a common question among SearXNG users. Before answering this
\.\. is a common question among SearXNG users. Before answering this
question, see what options a SearXNG user has.
.. contents::
@ -12,13 +12,13 @@ Why use a private instance?
:local:
:backlinks: entry
Public instances are open to everyone who has access to their URL. Usually, they
Public instances are open to everyone who has access to its URL. Usually, these
are operated by unknown parties (from the users' point of view). Private
instances can be used by a select group of people, such as a SearXNG instance for a
group of friends, or a company which can be accessed through a VPN. Instances can also be
single-user instances, which run locally on the user's machine.
instances can be used by a select group of people. It is for example a SearXNG of
group of friends or a company which can be accessed through VPN. Also it can be
single user one which runs on the user's laptop.
To gain more insight on how these instances work, let's dive into how SearXNG
To gain more insight on how these instances work let's dive into how SearXNG
protects its users.
.. _SearXNG protect privacy:
@ -26,26 +26,26 @@ protects its users.
How does SearXNG protect privacy?
=================================
SearXNG protects the privacy of its users in multiple ways, regardless of the type
of the instance (private or public). Removal of private data from search requests
SearXNG protects the privacy of its users in multiple ways regardless of the type
of the instance (private, public). Removal of private data from search requests
comes in three forms:
1. Removing private data from requests going to search services
2. Not forwarding anything from third party services through search services
1. removal of private data from requests going to search services
2. not forwarding anything from a third party services through search services
(e.g. advertisement)
3. Removing private data from requests going to the results pages
3. removal of private data from requests going to the result pages
Removing private data means not sending cookies to external search engines and
generating a random browser profile for every request. Thus, it does not matter
if a public or private instance handles the request, because it is anonymized in
both cases. The IP address used will be the IP of the instance, but SearXNG can also be
both cases. IP addresses will be the IP of the instance. But SearXNG can be
configured to use proxy or Tor. `Result proxy
<https://github.com/asciimoo/morty>`__ is supported, too.
SearXNG does not serve ads or tracking content, unlike most search services. Therefore,
SearXNG does not serve ads or tracking content unlike most search services. So
private data is not forwarded to third parties who might monetize it. Besides
protecting users from search services, both the referring page and search query are
hidden from the results pages being visited.
protecting users from search services, both referring page and search query are
hidden from visited result pages.
What are the consequences of using public instances?
@ -53,11 +53,11 @@ What are the consequences of using public instances?
If someone uses a public instance, they have to trust the administrator of that
instance. This means that the user of the public instance does not know whether
their requests are logged, aggregated, and sent or sold to a third party.
their requests are logged, aggregated and sent or sold to a third party.
Also, public instances without proper protection are more vulnerable to abuse of
the search service, which may cause the external service to enforce
CAPTCHAs or to ban the IP address of the instance. Thus, search requests would return less
Also, public instances without proper protection are more vulnerable to abusing
the search service, In this case the external service in exchange returns
CAPTCHAs or bans the IP of the instance. Thus, search requests return less
results.
I see. What about private instances?
@ -67,10 +67,10 @@ If users run their :ref:`own instances <installation>`, everything is in their
control: the source code, logging settings and private data. Unknown instance
administrators do not have to be trusted.
Furthermore, as the default settings of their instance are editable, there is no
need to use cookies to tailor SearXNG to their needs and preferences will not
Furthermore, as the default settings of their instance is editable, there is no
need to use cookies to tailor SearXNG to their needs. So preferences will not be
reset to defaults when clearing browser cookies. As settings are stored on
the user's computer, they will not be accessible to others as long as their computer is
their computer, it will not be accessible to others as long as their computer is
not compromised.
Conclusion
@ -80,7 +80,7 @@ Always use an instance which is operated by people you trust. The privacy
features of SearXNG are available to users no matter what kind of instance they
use.
For those on the go, or just wanting to try SearXNG for the first time, public
instances are the best choice. Public instances are also making the
world a better place by giving those who cannot, or do not want to, run an
instance access to a privacy-respecting search service.
If someone is on the go or just wants to try SearXNG for the first time public
instances are the best choices. Additionally, public instance are making a
world a better place, because those who cannot or do not want to run an
instance, have access to a privacy respecting search service.

View File

@ -1,48 +0,0 @@
.. _favicons source:
=================
Favicons (source)
=================
.. contents::
:depth: 2
:local:
:backlinks: entry
.. automodule:: searx.favicons
:members:
.. _favicons.config:
Favicons Config
===============
.. automodule:: searx.favicons.config
:members:
.. _favicons.proxy:
Favicons Proxy
==============
.. automodule:: searx.favicons.proxy
:members:
.. _favicons.resolver:
Favicons Resolver
=================
.. automodule:: searx.favicons.resolvers
:members:
.. _favicons.cache:
Favicons Cache
==============
.. automodule:: searx.favicons.cache
:members:

View File

@ -1,9 +0,0 @@
.. _hostnames plugin:
================
Hostnames plugin
================
.. automodule:: searx.plugins.hostnames
:members:

View File

@ -1,9 +0,0 @@
.. _unit converter plugin:
=====================
Unit converter plugin
=====================
.. automodule:: searx.plugins.unit_converter
:members:

View File

@ -1,8 +0,0 @@
.. _searx.settings_loader:
===============
Settings Loader
===============
.. automodule:: searx.settings_loader
:members:

View File

@ -1,8 +0,0 @@
.. _sqlite db:
=========
SQLite DB
=========
.. automodule:: searx.sqlitedb
:members:

25
examples/basic_engine.py Normal file
View File

@ -0,0 +1,25 @@
categories = ['general'] # optional
def request(query, params):
'''pre-request callback
params<dict>:
method : POST/GET
headers : {}
data : {} # if method == POST
url : ''
category: 'search category'
pageno : 1 # number of the requested page
'''
params['url'] = 'https://host/%s' % query
return params
def response(resp):
'''post-response callback
resp: requests response object
'''
return [{'url': '', 'title': '', 'content': ''}]

37
manage
View File

@ -41,7 +41,7 @@ PATH="${REPO_ROOT}/node_modules/.bin:${PATH}"
PYOBJECTS="searx"
PY_SETUP_EXTRAS='[test]'
GECKODRIVER_VERSION="v0.35.0"
GECKODRIVER_VERSION="v0.34.0"
# SPHINXOPTS=
BLACK_OPTIONS=("--target-version" "py311" "--line-length" "120" "--skip-string-normalization")
BLACK_TARGETS=("--exclude" "(searx/static|searx/languages.py)" "--include" 'searxng.msg|\.pyi?$' "searx" "searxng_extra" "tests")
@ -52,17 +52,34 @@ if [ -S "${_dev_redis_sock}" ] && [ -z "${SEARXNG_REDIS_URL}" ]; then
export SEARXNG_REDIS_URL="unix://${_dev_redis_sock}?db=0"
fi
pylint.FILES() {
# List files tagged by comment:
#
# # lint: pylint
#
# These py files are linted by test.pylint()
grep -l -r --include \*.py '^#[[:blank:]]*lint:[[:blank:]]*pylint' searx searxng_extra tests
find . -name searxng.msg
}
YAMLLINT_FILES=()
while IFS= read -r line; do
if [ "$line" != "tests/unit/settings/syntaxerror_settings.yml" ]; then
YAMLLINT_FILES+=("$line")
fi
YAMLLINT_FILES+=("$line")
done <<< "$(git ls-files './tests/*.yml' './searx/*.yml' './utils/templates/etc/searxng/*.yml' '.github/*.yml' '.github/*/*.yml')"
RST_FILES=(
'README.rst'
)
PYLINT_SEARXNG_DISABLE_OPTION="\
I,C,R,\
W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\
E1136"
PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="traits,supported_languages,language_aliases,logger,categories"
PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc"
help() {
nvm.help
cat <<EOF
@ -233,7 +250,7 @@ gecko.driver() {
build_msg INSTALL "geckodriver already installed"
return
fi
PLATFORM="$(python -c 'import platform; print(platform.system().lower(), platform.architecture()[0])')"
PLATFORM="$(python3 -c 'import platform; print(platform.system().lower(), platform.architecture()[0])')"
case "$PLATFORM" in
"linux 32bit" | "linux2 32bit") ARCH="linux32";;
"linux 64bit" | "linux2 64bit") ARCH="linux64";;
@ -299,8 +316,8 @@ pyenv.install() {
( set -e
pyenv
build_msg PYENV "[install] pip install --use-pep517 --no-build-isolation -e 'searx${PY_SETUP_EXTRAS}'"
"${PY_ENV_BIN}/python" -m pip install --use-pep517 --no-build-isolation -e ".${PY_SETUP_EXTRAS}"
build_msg PYENV "[install] pip install -e 'searx${PY_SETUP_EXTRAS}'"
"${PY_ENV_BIN}/python" -m pip install -e ".${PY_SETUP_EXTRAS}"
)
local exit_val=$?
if [ ! $exit_val -eq 0 ]; then
@ -321,6 +338,12 @@ format.python() {
dump_return $?
}
PYLINT_FILES=()
while IFS= read -r line; do
PYLINT_FILES+=("$line")
done <<< "$(pylint.FILES)"
# shellcheck disable=SC2119
main() {

View File

@ -1,6 +1,6 @@
{
"dependencies": {
"eslint": "^9.0.0",
"eslint": "^8.50.0",
"pyright": "^1.1.329"
},
"scripts": {

View File

@ -1,24 +1,22 @@
mock==5.1.0
nose2[coverage_plugin]==0.15.1
nose2[coverage_plugin]==0.14.1
cov-core==1.15.0
black==24.3.0
pylint==3.3.1
black==24.2.0
pylint==3.1.0
splinter==0.21.0
selenium==4.25.0
Pallets-Sphinx-Themes==2.3.0
Sphinx==7.4.7
sphinx-issues==5.0.0
selenium==4.18.1
Pallets-Sphinx-Themes==2.1.1
Sphinx<=7.1.2; python_version == '3.8'
Sphinx==7.2.6; python_version > '3.8'
sphinx-issues==4.0.0
sphinx-jinja==2.0.2
sphinx-tabs==3.4.7
sphinx-tabs==3.4.5
sphinxcontrib-programoutput==0.17
sphinx-autobuild==2024.10.3
sphinx-notfound-page==1.0.4
myst-parser==3.0.1
linuxdoc==20240924
sphinx-autobuild==2021.3.14
sphinx-notfound-page==1.0.0
myst-parser==2.0.0
linuxdoc==20231020
aiounittest==1.4.2
yamllint==1.35.1
wlc==1.15
wlc==1.14
coloredlogs==15.0.1
docutils>=0.21.2
parameterized==0.9.0
autodoc_pydantic==2.2.0

View File

@ -1,21 +1,18 @@
certifi==2024.8.30
babel==2.16.0
certifi==2024.2.2
babel==2.14.0
flask-babel==4.0.0
flask==3.0.3
jinja2==3.1.4
lxml==5.3.0
pygments==2.18.0
flask==3.0.2
jinja2==3.1.3
lxml==5.1.0
pygments==2.17.2
python-dateutil==2.9.0.post0
pyyaml==6.0.2
pyyaml==6.0.1
httpx[http2]==0.24.1
Brotli==1.1.0
uvloop==0.21.0
uvloop==0.19.0
httpx-socks[asyncio]==0.7.7
setproctitle==1.3.3
redis==5.0.8
redis==5.0.2
markdown-it-py==3.0.0
fasttext-predict==0.9.2.2
tomli==2.0.2; python_version < '3.11'
msgspec==0.18.6
eval_type_backport; python_version < '3.9'
typer-slim==0.12.5
pytomlpp==1.0.13

View File

@ -1,5 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, cyclic-import
# lint: pylint
# pylint: disable=missing-module-docstring
import sys
import os

View File

@ -1,30 +1,25 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
import sys
from os import listdir
from os.path import realpath, dirname, join, isdir
from searx.utils import load_module
from collections import defaultdict
from searx.utils import load_module
answerers_dir = dirname(realpath(__file__))
def load_answerers():
answerers = [] # pylint: disable=redefined-outer-name
answerers = []
for filename in listdir(answerers_dir):
if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
continue
module = load_module('answerer.py', join(answerers_dir, filename))
if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not module.keywords:
sys.exit(2)
if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
exit(2)
answerers.append(module)
return answerers
def get_answerers_by_keywords(answerers): # pylint:disable=redefined-outer-name
def get_answerers_by_keywords(answerers):
by_keyword = defaultdict(list)
for answerer in answerers:
for keyword in answerer.keywords:

View File

@ -1,2 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring

View File

@ -1,6 +1,3 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
import hashlib
import random
import string

View File

@ -1,2 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring

View File

@ -1,6 +1,3 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
from functools import reduce
from operator import mul
@ -20,27 +17,27 @@ def answer(query):
try:
args = list(map(float, parts[1:]))
except: # pylint: disable=bare-except
except:
return []
func = parts[0]
_answer = None
answer = None
if func == 'min':
_answer = min(args)
answer = min(args)
elif func == 'max':
_answer = max(args)
answer = max(args)
elif func == 'avg':
_answer = sum(args) / len(args)
answer = sum(args) / len(args)
elif func == 'sum':
_answer = sum(args)
answer = sum(args)
elif func == 'prod':
_answer = reduce(mul, args, 1)
answer = reduce(mul, args, 1)
if _answer is None:
if answer is None:
return []
return [{'answer': str(_answer)}]
return [{'answer': str(answer)}]
# required answerer function

View File

@ -1,11 +1,11 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This module implements functions needed for the autocompleter.
"""
# pylint: disable=use-dict-literal
import json
import html
from urllib.parse import urlencode, quote_plus
import lxml
@ -163,7 +163,7 @@ def stract(query, _lang):
if not resp.ok:
return []
return [html.unescape(suggestion['raw']) for suggestion in resp.json()]
return [suggestion['raw'] for suggestion in resp.json()]
def startpage(query, sxng_locale):

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This module implements the :origin:`searxng_msg <babel.cfg>` extractor to
extract messages from:

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
""".. _botdetection src:
Implementations used for bot detection.

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pylint: disable=missing-module-docstring, invalid-name
from __future__ import annotations

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Configuration class :py:class:`Config` with deep-update, schema validation
and deprecated names.
@ -13,8 +14,7 @@ import copy
import typing
import logging
import pathlib
from ..compat import tomllib
import pytomlpp as toml
__all__ = ['Config', 'UNSET', 'SchemaIssue']
@ -62,7 +62,7 @@ class Config:
# init schema
log.debug("load schema file: %s", schema_file)
cfg = cls(cfg_schema=toml_load(schema_file), deprecated=deprecated)
cfg = cls(cfg_schema=toml.load(schema_file), deprecated=deprecated)
if not cfg_file.exists():
log.warning("missing config file: %s", cfg_file)
return cfg
@ -70,7 +70,12 @@ class Config:
# load configuration
log.debug("load config file: %s", cfg_file)
upd_cfg = toml_load(cfg_file)
try:
upd_cfg = toml.load(cfg_file)
except toml.DecodeError as exc:
msg = str(exc).replace('\t', '').replace('\n', ' ')
log.error("%s: %s", cfg_file, msg)
raise
is_valid, issue_list = cfg.validate(upd_cfg)
for msg in issue_list:
@ -172,16 +177,6 @@ class Config:
return getattr(m, name)
def toml_load(file_name):
try:
with open(file_name, "rb") as f:
return tomllib.load(f)
except tomllib.TOMLDecodeError as exc:
msg = str(exc).replace('\t', '').replace('\n', ' ')
log.error("%s: %s", file_name, msg)
raise
# working with dictionaries

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``http_accept``
----------------------

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``http_accept_encoding``
-------------------------------

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``http_accept_language``
-------------------------------

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``http_connection``
--------------------------

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``http_user_agent``
--------------------------

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
""".. _botdetection.ip_limit:
Method ``ip_limit``
@ -76,11 +77,11 @@ LONG_MAX = 150
LONG_MAX_SUSPICIOUS = 10
"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
API_WINDOW = 3600
API_WONDOW = 3600
"""Time (sec) before sliding window for API requests (format != html) expires."""
API_MAX = 4
"""Maximum requests from one IP in the :py:obj:`API_WINDOW`"""
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
"""Time (sec) before sliding window for one suspicious IP expires."""
@ -103,7 +104,7 @@ def filter_request(
return None
if request.args.get('format', 'html') != 'html':
c = incr_sliding_window(redis_client, 'ip_limit.API_WINDOW:' + network.compressed, API_WINDOW)
c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
if c > API_MAX:
return too_many_requests(network, "too many request in API_WINDOW")

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
""".. _botdetection.ip_lists:
Method ``ip_lists``

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``link_token``
---------------------
@ -28,7 +29,7 @@ And in the HTML template from flask a stylesheet link is needed (the value of
<link rel="stylesheet"
href="{{ url_for('client_token', token=link_token) }}"
type="text/css" >
type="text/css" />
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For

View File

@ -1,18 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Compatibility with older versions"""
# pylint: disable=unused-import
__all__ = [
"tomllib",
]
import sys
# TOML (lib) compatibility
# ------------------------
if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This module holds the *data* created by::
make data.all

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +1,11 @@
{
"versions": [
"123.0",
"122.0"
],
"os": [
"Windows NT 10.0; Win64; x64",
"X11; Linux x86_64"
],
"ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
"versions": [
"132.0",
"131.0"
]
"ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
}

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Implementations of the framework for the SearXNG engines.
.. hint::

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Engine's traits are fetched from the origin engines and stored in a JSON file
in the *data folder*. Most often traits are languages and region codes and
their mapping from SearXNG's representation to the representation in the origin
@ -166,7 +167,7 @@ class EngineTraits:
# - name: google italian
# engine: google
# language: it
# region: it-IT # type: ignore
# region: it-IT
traits = self.copy()

View File

@ -1,12 +1,11 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=invalid-name
"""1337x
"""
1337x
"""
from urllib.parse import quote, urljoin
from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex
# about
about = {
@ -40,7 +39,9 @@ def response(resp):
title = extract_text(eval_xpath(result, './td[contains(@class, "name")]/a[2]'))
seed = extract_text(eval_xpath(result, './/td[contains(@class, "seeds")]'))
leech = extract_text(eval_xpath(result, './/td[contains(@class, "leeches")]'))
filesize = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()'))
filesize_info = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()'))
filesize, filesize_multiplier = filesize_info.split()
filesize = get_torrent_size(filesize, filesize_multiplier)
results.append(
{

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pylint: disable=invalid-name
"""9GAG (social media)"""

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Load and initialize the ``engines``, see :py:func:`load_engines` and register
:py:obj:`engine_shortcuts`.

View File

@ -74,7 +74,7 @@ def response(resp):
if number_of_results:
try:
results.append({'number_of_results': int(extract_text(number_of_results))})
except: # pylint: disable=bare-except
except:
pass
return results

View File

@ -1,83 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""`Alpine Linux binary packages`_. `Alpine Linux`_ is a Linux-based operation
system designed to be small, simple and secure. Contrary to many other Linux
distributions, it uses musl, BusyBox and OpenRC. Alpine is mostly used on
servers and for Docker images.
.. _Alpine Linux binary packages: https://pkgs.alpinelinux.org
.. _Alpine Linux: https://www.alpinelinux.org
"""
import re
from urllib.parse import urlencode
from lxml import html
from dateutil import parser
from searx.utils import eval_xpath, eval_xpath_list, extract_text
about = {
'website': 'https://www.alpinelinux.org',
'wikidata_id': 'Q4033826',
'use_official_api': False,
'official_api_documentation': None,
'require_api_key': False,
'results': 'HTML',
}
paging = True
categories = ['packages', 'it']
base_url = "https://pkgs.alpinelinux.org"
alpine_arch = 'x86_64'
"""Kernel architecture: ``x86_64``, ``x86``, ``aarch64``, ``armhf``,
``ppc64le``, ``s390x``, ``armv7`` or ``riscv64``"""
ARCH_RE = re.compile("x86_64|x86|aarch64|armhf|ppc64le|s390x|armv7|riscv64")
"""Regular expression to match supported architectures in the query string."""
def request(query, params):
query_arch = ARCH_RE.search(query)
if query_arch:
query_arch = query_arch.group(0)
query = query.replace(query_arch, '').strip()
args = {
# use wildcards to match more than just packages with the exact same
# name as the query
'name': f"*{query}*",
'page': params['pageno'],
'arch': query_arch or alpine_arch,
}
params['url'] = f"{base_url}/packages?{urlencode(args)}"
return params
def response(resp):
results = []
doc = html.fromstring(resp.text)
for result in eval_xpath_list(doc, "//table/tbody/tr"):
if len(result.xpath("./td")) < 9:
# skip non valid entries in the result table
# e.g the "No item found..." message
continue
results.append(
{
'template': 'packages.html',
'url': base_url + extract_text(eval_xpath(result, './td[contains(@class, "package")]/a/@href')),
'title': extract_text(eval_xpath(result, './td[contains(@class, "package")]')),
'package_name': extract_text(eval_xpath(result, './td[contains(@class, "package")]')),
'publishedDate': parser.parse(extract_text(eval_xpath(result, './td[contains(@class, "bdate")]'))),
'version': extract_text(eval_xpath(result, './td[contains(@class, "version")]')),
'homepage': extract_text(eval_xpath(result, './td[contains(@class, "url")]/a/@href')),
'maintainer': extract_text(eval_xpath(result, './td[contains(@class, "maintainer")]')),
'license_name': extract_text(eval_xpath(result, './td[contains(@class, "license")]')),
'tags': [extract_text(eval_xpath(result, './td[contains(@class, "repo")]'))],
}
)
return results

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""`Anna's Archive`_ is a free non-profit online shadow library metasearch
engine providing access to a variety of book resources (also via IPFS), created
by a team of anonymous archivists (AnnaArchivist_).
@ -24,7 +25,7 @@ for *newest* articles and journals (PDF) / by shortcut ``!aaa <search-term>``.
- name: annas articles
engine: annas_archive
shortcut: aaa
aa_content: 'magazine'
aa_content: 'journal_article'
aa_ext: 'pdf'
aa_sort: 'newest'
@ -60,7 +61,7 @@ base_url: str = "https://annas-archive.org"
aa_content: str = ""
"""Anan's search form field **Content** / possible values::
book_fiction, book_unknown, book_nonfiction,
journal_article, book_any, book_fiction, book_unknown, book_nonfiction,
book_comic, magazine, standards_document
To not filter use an empty string (default).
@ -133,7 +134,7 @@ def _get_result(item):
'publisher': extract_text(eval_xpath(item, './/div[contains(@class, "text-sm")]')),
'authors': [extract_text(eval_xpath(item, './/div[contains(@class, "italic")]'))],
'content': extract_text(eval_xpath(item, './/div[contains(@class, "text-xs")]')),
'thumbnail': item.xpath('.//img/@src')[0],
'img_src': item.xpath('.//img/@src')[0],
}
@ -184,8 +185,3 @@ def fetch_traits(engine_traits: EngineTraits):
for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
engine_traits.custom['sort'].append(x.get("value"))
# for better diff; sort the persistence of these traits
engine_traits.custom['content'].sort()
engine_traits.custom['ext'].sort()
engine_traits.custom['sort'].sort()

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""APKMirror
"""
@ -53,8 +54,8 @@ def response(resp):
url = base_url + link.attrib.get('href') + '#downloads'
title = extract_text(link)
thumbnail = base_url + eval_xpath_getindex(result, './/img/@src', 0)
res = {'url': url, 'title': title, 'thumbnail': thumbnail}
img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0)
res = {'url': url, 'title': title, 'img_src': img_src}
results.append(res)

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Apple App Store
# lint: pylint
"""
Apple App Store
"""
from json import loads
@ -47,7 +48,7 @@ def response(resp):
'url': result['trackViewUrl'],
'title': result['trackName'],
'content': result['description'],
'thumbnail': result['artworkUrl100'],
'img_src': result['artworkUrl100'],
'publishedDate': parse(result['currentVersionReleaseDate']),
'author': result['sellerName'],
}

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Apple Maps"""
from json import loads

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Arch Linux Wiki
~~~~~~~~~~~~~~~

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""The Art Institute of Chicago
Explore thousands of artworks from The Art Institute of Chicago.

View File

@ -1,12 +1,11 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""ArXiV (Scientific preprints)
"""
from datetime import datetime
ArXiV (Scientific preprints)
"""
from lxml import etree
from lxml.etree import XPath
from datetime import datetime
from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
# about
@ -51,7 +50,7 @@ def request(query, params):
# basic search
offset = (params['pageno'] - 1) * number_of_results
string_args = {'query': query, 'offset': offset, 'number_of_results': number_of_results}
string_args = dict(query=query, offset=offset, number_of_results=number_of_results)
params['url'] = base_url.format(**string_args)

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Ask.com"""
from urllib.parse import urlencode
@ -19,7 +20,6 @@ about = {
# Engine Configuration
categories = ['general']
paging = True
max_page = 5
# Base URL
base_url = "https://www.ask.com/web"
@ -62,11 +62,11 @@ def response(resp):
results.append(
{
"url": item['url'].split('&ueid')[0],
"url": item['url'],
"title": item['title'],
"content": item['abstract'],
"publishedDate": pubdate_original,
# "thumbnail": item.get('image_url') or None, # these are not thumbs / to large
# "img_src": item.get('image_url') or None, # these are not thumbs / to large
"metadata": ' | '.join(metadata),
}
)

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Bandcamp (Music)
@website https://bandcamp.com/
@ -37,6 +38,16 @@ iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/
def request(query, params):
'''pre-request callback
params<dict>:
method : POST/GET
headers : {}
data : {} # if method == POST
url : ''
category: 'search category'
pageno : 1 # number of the requested page
'''
search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno'])
params['url'] = base_url + search_path
@ -44,7 +55,10 @@ def request(query, params):
def response(resp):
'''post-response callback
resp: requests response object
'''
results = []
dom = html.fromstring(resp.text)
@ -68,7 +82,7 @@ def response(resp):
thumbnail = result.xpath('.//div[@class="art"]/img/@src')
if thumbnail:
new_result['thumbnail'] = thumbnail[0]
new_result['img_src'] = thumbnail[0]
result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
itemtype = extract_text(result.xpath('.//div[@class="itemtype"]')).lower()

View File

@ -1,12 +1,12 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""BASE (Scholar publications)
"""
from datetime import datetime
import re
BASE (Scholar publications)
"""
from urllib.parse import urlencode
from lxml import etree
from datetime import datetime
import re
from searx.utils import searx_useragent
# about
@ -55,17 +55,13 @@ shorcut_dict = {
def request(query, params):
# replace shortcuts with API advanced search keywords
for key, val in shorcut_dict.items():
query = re.sub(key, val, query)
for key in shorcut_dict.keys():
query = re.sub(key, shorcut_dict[key], query)
# basic search
offset = (params['pageno'] - 1) * number_of_results
string_args = {
'query': urlencode({'query': query}),
'offset': offset,
'hits': number_of_results,
}
string_args = dict(query=urlencode({'query': query}), offset=offset, hits=number_of_results)
params['url'] = base_url.format(**string_args)
@ -80,10 +76,8 @@ def response(resp):
for entry in search_results.xpath('./result/doc'):
content = "No description available"
url = ""
title = ""
date = datetime.now() # needed in case no dcdate is available for an item
date = datetime.now() # needed in case no dcdate is available for an item
for item in entry:
if item.attrib["name"] == "dcdate":
date = item.text
@ -105,7 +99,7 @@ def response(resp):
try:
publishedDate = datetime.strptime(date, date_format)
break
except: # pylint: disable=bare-except
except:
pass
if publishedDate is not None:

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Bilibili is a Chinese video sharing website.
.. _Bilibili: https://www.bilibili.com
@ -9,8 +10,6 @@ import string
from urllib.parse import urlencode
from datetime import datetime, timedelta
from searx import utils
# Engine metadata
about = {
"website": "https://www.bilibili.com",
@ -58,8 +57,6 @@ def request(query, params):
# Format the video duration
def format_duration(duration):
if not ":" in duration:
return None
minutes, seconds = map(int, duration.split(":"))
total_seconds = minutes * 60 + seconds
@ -74,7 +71,7 @@ def response(resp):
results = []
for item in search_res.get("data", {}).get("result", []):
title = utils.html_to_text(item["title"])
title = item["title"]
url = item["arcurl"]
thumbnail = item["pic"]
description = item["description"]

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This is the implementation of the Bing-WEB engine. Some of this
implementations are shared by other engines:

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Bing-Images: description see :py:obj:`searx.engines.bing`.
"""
# pylint: disable=invalid-name
@ -99,7 +100,7 @@ def response(resp):
'url': metadata['purl'],
'thumbnail_src': metadata['turl'],
'img_src': metadata['murl'],
'content': metadata.get('desc'),
'content': metadata['desc'],
'title': title,
'source': source,
'resolution': img_format[0],

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Bing-News: description see :py:obj:`searx.engines.bing`.
.. hint::
@ -123,16 +124,14 @@ def response(resp):
thumbnail = None
imagelink = eval_xpath_getindex(newsitem, './/a[@class="imagelink"]//img', 0, None)
if imagelink is not None:
thumbnail = imagelink.attrib.get('src')
if not thumbnail.startswith("https://www.bing.com"):
thumbnail = 'https://www.bing.com/' + thumbnail
thumbnail = 'https://www.bing.com/' + imagelink.attrib.get('src')
results.append(
{
'url': url,
'title': title,
'content': content,
'thumbnail': thumbnail,
'img_src': thumbnail,
'metadata': metadata,
}
)

View File

@ -1,7 +1,8 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=invalid-name
# lint: pylint
"""Bing-Videos: description see :py:obj:`searx.engines.bing`.
"""
# pylint: disable=invalid-name
from typing import TYPE_CHECKING
import json

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""BPB refers to ``Bundeszentrale für poltische Bildung``, which is a German
governmental institution aiming to reduce misinformation by providing resources
about politics and history.
@ -40,9 +41,9 @@ def response(resp):
json_resp = resp.json()
for result in json_resp['teaser']:
thumbnail = None
img_src = None
if result['teaser']['image']:
thumbnail = base_url + result['teaser']['image']['sources'][-1]['url']
img_src = base_url + result['teaser']['image']['sources'][-1]['url']
metadata = result['extension']['overline']
authors = ', '.join(author['name'] for author in result['extension'].get('authors', []))
@ -58,7 +59,7 @@ def response(resp):
'url': base_url + result['teaser']['link']['url'],
'title': result['teaser']['title'],
'content': result['teaser']['text'],
'thumbnail': thumbnail,
'img_src': img_src,
'publishedDate': publishedDate,
'metadata': metadata,
}

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Brave supports the categories listed in :py:obj:`brave_category` (General,
news, videos, images). The support of :py:obj:`paging` and :py:obj:`time range
<time_range_support>` is limited (see remarks).
@ -123,6 +124,7 @@ from typing import Any, TYPE_CHECKING
from urllib.parse import (
urlencode,
urlparse,
parse_qs,
)
from dateutil import parser
@ -131,12 +133,10 @@ from lxml import html
from searx import locales
from searx.utils import (
extract_text,
extr,
eval_xpath,
eval_xpath_list,
eval_xpath_getindex,
js_variable_to_python,
get_embeded_stream_url,
)
from searx.enginelib.traits import EngineTraits
@ -253,7 +253,11 @@ def response(resp):
if brave_category in ('search', 'goggles'):
return _parse_search(resp)
datastr = extr(resp.text, "const data = ", ";\n").strip()
datastr = ""
for line in resp.text.split("\n"):
if "const data = " in line:
datastr = line.replace("const data = ", "").strip()[:-1]
break
json_data = js_variable_to_python(datastr)
json_resp = json_data[1]['data']['body']['response']
@ -293,14 +297,14 @@ def _parse_search(resp):
content_tag = eval_xpath_getindex(result, './/div[contains(@class, "snippet-description")]', 0, default='')
pub_date_raw = eval_xpath(result, 'substring-before(.//div[contains(@class, "snippet-description")], "-")')
thumbnail = eval_xpath_getindex(result, './/img[contains(@class, "thumb")]/@src', 0, default='')
img_src = eval_xpath_getindex(result, './/img[contains(@class, "thumb")]/@src', 0, default='')
item = {
'url': url,
'title': extract_text(title_tag),
'content': extract_text(content_tag),
'publishedDate': _extract_published_date(pub_date_raw),
'thumbnail': thumbnail,
'img_src': img_src,
}
video_tag = eval_xpath_getindex(
@ -311,7 +315,7 @@ def _parse_search(resp):
# In my tests a video tag in the WEB search was most often not a
# video, except the ones from youtube ..
iframe_src = get_embeded_stream_url(url)
iframe_src = _get_iframe_src(url)
if iframe_src:
item['iframe_src'] = iframe_src
item['template'] = 'videos.html'
@ -321,13 +325,22 @@ def _parse_search(resp):
)
item['publishedDate'] = _extract_published_date(pub_date_raw)
else:
item['thumbnail'] = eval_xpath_getindex(video_tag, './/img/@src', 0, default='')
item['img_src'] = eval_xpath_getindex(video_tag, './/img/@src', 0, default='')
result_list.append(item)
return result_list
def _get_iframe_src(url):
parsed_url = urlparse(url)
if parsed_url.path == '/watch' and parsed_url.query:
video_id = parse_qs(parsed_url.query).get('v', []) # type: ignore
if video_id:
return 'https://www.youtube-nocookie.com/embed/' + video_id[0] # type: ignore
return None
def _parse_news(json_resp):
result_list = []
@ -339,7 +352,7 @@ def _parse_news(json_resp):
'publishedDate': _extract_published_date(result['age']),
}
if result['thumbnail'] is not None:
item['thumbnail'] = result['thumbnail']['src']
item['img_src'] = result['thumbnail']['src']
result_list.append(item)
return result_list
@ -383,7 +396,7 @@ def _parse_videos(json_resp):
if result['thumbnail'] is not None:
item['thumbnail'] = result['thumbnail']['src']
iframe_src = get_embeded_stream_url(url)
iframe_src = _get_iframe_src(url)
if iframe_src:
item['iframe_src'] = iframe_src
@ -417,15 +430,14 @@ def fetch_traits(engine_traits: EngineTraits):
print("ERROR: response from Brave is not OK.")
dom = html.fromstring(resp.text) # type: ignore
for option in dom.xpath('//section//option[@value="en-us"]/../option'):
for option in dom.xpath('//div[@id="language-select"]//option'):
ui_lang = option.get('value')
try:
l = babel.Locale.parse(ui_lang, sep='-')
if l.territory:
if '-' in ui_lang:
sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep='-'))
else:
sxng_tag = language_tag(babel.Locale.parse(ui_lang, sep='-'))
sxng_tag = language_tag(babel.Locale.parse(ui_lang))
except babel.UnknownLocaleError:
print("ERROR: can't determine babel locale of Brave's (UI) language %s" % ui_lang)
@ -445,7 +457,7 @@ def fetch_traits(engine_traits: EngineTraits):
if not resp.ok: # type: ignore
print("ERROR: response from Brave is not OK.")
country_js = resp.text[resp.text.index("options:{all") + len('options:') :] # type: ignore
country_js = resp.text[resp.text.index("options:{all") + len('options:') :]
country_js = country_js[: country_js.index("},k={default")]
country_tags = js_variable_to_python(country_js)

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""BT4G_ (bt4g.com) is not a tracker and doesn't store any content and only
collects torrent metadata (such as file names and file sizes) and a magnet link
(torrent identifier).
@ -36,11 +37,14 @@ Implementations
"""
import re
from datetime import datetime
from urllib.parse import quote
from lxml import etree
from searx.utils import get_torrent_size
# about
about = {
"website": 'https://bt4gprx.com',
@ -100,6 +104,8 @@ def response(resp):
title = entry.find("title").text
link = entry.find("guid").text
fullDescription = entry.find("description").text.split('<br>')
filesize = fullDescription[1]
filesizeParsed = re.split(r"([A-Z]+)", filesize)
magnetlink = entry.find("link").text
pubDate = entry.find("pubDate").text
results.append(
@ -109,7 +115,7 @@ def response(resp):
'magnetlink': magnetlink,
'seed': 'N/A',
'leech': 'N/A',
'filesize': fullDescription[1],
'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]),
'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'),
'template': 'torrent.html',
}

Some files were not shown because too many files have changed in this diff Show More