mirror of
https://github.com/searxng/searxng.git
synced 2025-12-22 19:50:00 +00:00
[mod] limiter: trusted proxies (#4911)
Replaces `x_for` functionality with `trusted_proxies`. This allows defining which IP / ranges to trust extracting the client IP address from X-Forwarded-For and X-Real-IP headers. We don't know if the proxy chain will give us the proper client address (REMOTE_ADDR in the WSGI environment), so we rely on reading the headers of the proxy before SearXNG (if there is one, in that case it must be added to trusted_proxies) hoping it has done the proper checks. In case a proxy in the chain does not check the client address correctly, integrity is compromised and this should be fixed by whoever manages the proxy, not us. Closes: - https://github.com/searxng/searxng/issues/4940 - https://github.com/searxng/searxng/issues/4939 - https://github.com/searxng/searxng/issues/4907 - https://github.com/searxng/searxng/issues/3632 - https://github.com/searxng/searxng/issues/3191 - https://github.com/searxng/searxng/issues/1237 Related: - https://github.com/searxng/searxng-docker/issues/386 - https://github.com/inetol-infrastructure/searxng-container/issues/81
This commit is contained in:
@@ -4,19 +4,22 @@
|
||||
Implementations used for bot detection.
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["init", "dump_request", "get_network", "too_many_requests", "ProxyFix"]
|
||||
|
||||
|
||||
import valkey
|
||||
|
||||
from ._helpers import dump_request
|
||||
from ._helpers import get_real_ip
|
||||
from ._helpers import get_network
|
||||
from ._helpers import too_many_requests
|
||||
|
||||
__all__ = ['dump_request', 'get_network', 'get_real_ip', 'too_many_requests']
|
||||
|
||||
valkey_client = None
|
||||
cfg = None
|
||||
from . import config
|
||||
from . import valkeydb
|
||||
from .trusted_proxies import ProxyFix
|
||||
|
||||
|
||||
def init(_cfg, _valkey_client):
|
||||
global valkey_client, cfg # pylint: disable=global-statement
|
||||
valkey_client = _valkey_client
|
||||
cfg = _cfg
|
||||
def init(cfg: config.Config, valkey_client: valkey.Valkey | None):
|
||||
config.set_global_cfg(cfg)
|
||||
if valkey_client:
|
||||
valkeydb.set_valkey_client(valkey_client)
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=missing-module-docstring, invalid-name
|
||||
from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
__all__ = ["log_error_only_once", "dump_request", "get_network", "logger", "too_many_requests"]
|
||||
|
||||
from ipaddress import (
|
||||
IPv4Network,
|
||||
@@ -8,20 +11,19 @@ from ipaddress import (
|
||||
IPv4Address,
|
||||
IPv6Address,
|
||||
ip_network,
|
||||
ip_address,
|
||||
)
|
||||
import flask
|
||||
import werkzeug
|
||||
|
||||
from searx import logger
|
||||
from searx.extended_types import SXNG_Request
|
||||
|
||||
from . import config
|
||||
if t.TYPE_CHECKING:
|
||||
from . import config
|
||||
|
||||
logger = logger.getChild('botdetection')
|
||||
|
||||
|
||||
def dump_request(request: SXNG_Request):
|
||||
def dump_request(request: flask.Request):
|
||||
return (
|
||||
request.path
|
||||
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
|
||||
@@ -52,86 +54,33 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz
|
||||
|
||||
|
||||
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
|
||||
"""Returns the (client) network of whether the real_ip is part of."""
|
||||
"""Returns the (client) network of whether the ``real_ip`` is part of.
|
||||
|
||||
The ``ipv4_prefix`` and ``ipv6_prefix`` define the number of leading bits in
|
||||
an address that are compared to determine whether or not an address is part
|
||||
of a (client) network.
|
||||
|
||||
.. code:: toml
|
||||
|
||||
[botdetection]
|
||||
|
||||
ipv4_prefix = 32
|
||||
ipv6_prefix = 48
|
||||
|
||||
"""
|
||||
|
||||
prefix: int = cfg["botdetection.ipv4_prefix"]
|
||||
if real_ip.version == 6:
|
||||
prefix = cfg['real_ip.ipv6_prefix']
|
||||
else:
|
||||
prefix = cfg['real_ip.ipv4_prefix']
|
||||
prefix: int = cfg["botdetection.ipv6_prefix"]
|
||||
network = ip_network(f"{real_ip}/{prefix}", strict=False)
|
||||
# logger.debug("get_network(): %s", network.compressed)
|
||||
return network
|
||||
|
||||
|
||||
_logged_errors = []
|
||||
_logged_errors: list[str] = []
|
||||
|
||||
|
||||
def _log_error_only_once(err_msg):
|
||||
def log_error_only_once(err_msg: str):
|
||||
if err_msg not in _logged_errors:
|
||||
logger.error(err_msg)
|
||||
_logged_errors.append(err_msg)
|
||||
|
||||
|
||||
def get_real_ip(request: SXNG_Request) -> str:
|
||||
"""Returns real IP of the request. Since not all proxies set all the HTTP
|
||||
headers and incoming headers can be faked it may happen that the IP cannot
|
||||
be determined correctly.
|
||||
|
||||
.. sidebar:: :py:obj:`flask.Request.remote_addr`
|
||||
|
||||
SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
|
||||
|
||||
This function tries to get the remote IP in the order listed below,
|
||||
additional some tests are done and if inconsistencies or errors are
|
||||
detected, they are logged.
|
||||
|
||||
The remote IP of the request is taken from (first match):
|
||||
|
||||
- X-Forwarded-For_ header
|
||||
- `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
|
||||
- :py:obj:`flask.Request.remote_addr`
|
||||
|
||||
.. _ProxyFix:
|
||||
https://werkzeug.palletsprojects.com/middleware/proxy_fix/
|
||||
|
||||
.. _X-Forwarded-For:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
||||
|
||||
"""
|
||||
|
||||
forwarded_for = request.headers.get("X-Forwarded-For")
|
||||
real_ip = request.headers.get('X-Real-IP')
|
||||
remote_addr = request.remote_addr
|
||||
# logger.debug(
|
||||
# "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
|
||||
# )
|
||||
|
||||
if not forwarded_for:
|
||||
_log_error_only_once("X-Forwarded-For header is not set!")
|
||||
else:
|
||||
from . import cfg # pylint: disable=import-outside-toplevel, cyclic-import
|
||||
|
||||
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
|
||||
x_for: int = cfg['real_ip.x_for'] # type: ignore
|
||||
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
|
||||
|
||||
if not real_ip:
|
||||
_log_error_only_once("X-Real-IP header is not set!")
|
||||
|
||||
if forwarded_for and real_ip and forwarded_for != real_ip:
|
||||
logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
|
||||
|
||||
if forwarded_for and remote_addr and forwarded_for != remote_addr:
|
||||
logger.warning(
|
||||
"IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
|
||||
)
|
||||
|
||||
if real_ip and remote_addr and real_ip != remote_addr:
|
||||
logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
|
||||
|
||||
request_ip = ip_address(forwarded_for or real_ip or remote_addr or '0.0.0.0')
|
||||
if request_ip.version == 6 and request_ip.ipv4_mapped:
|
||||
request_ip = request_ip.ipv4_mapped
|
||||
|
||||
# logger.debug("get_real_ip() -> %s", request_ip)
|
||||
return str(request_ip)
|
||||
|
||||
@@ -7,19 +7,32 @@ structured dictionaries. The configuration schema is defined in a dictionary
|
||||
structure and the configuration data is given in a dictionary structure.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from typing import Any
|
||||
import typing
|
||||
|
||||
import copy
|
||||
import typing
|
||||
import logging
|
||||
import pathlib
|
||||
|
||||
from ..compat import tomllib
|
||||
|
||||
__all__ = ['Config', 'UNSET', 'SchemaIssue']
|
||||
__all__ = ['Config', 'UNSET', 'SchemaIssue', 'set_global_cfg', 'get_global_cfg']
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
CFG: Config | None = None
|
||||
"""Global config of the botdetection."""
|
||||
|
||||
|
||||
def set_global_cfg(cfg: Config):
|
||||
global CFG # pylint: disable=global-statement
|
||||
CFG = cfg
|
||||
|
||||
|
||||
def get_global_cfg() -> Config:
|
||||
if CFG is None:
|
||||
raise ValueError("Botdetection's config is not yet initialized.")
|
||||
return CFG
|
||||
|
||||
|
||||
class FALSE:
|
||||
"""Class of ``False`` singleton"""
|
||||
@@ -57,7 +70,7 @@ class Config:
|
||||
UNSET = UNSET
|
||||
|
||||
@classmethod
|
||||
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict) -> Config:
|
||||
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict[str, str]) -> Config:
|
||||
|
||||
# init schema
|
||||
|
||||
@@ -80,7 +93,7 @@ class Config:
|
||||
cfg.update(upd_cfg)
|
||||
return cfg
|
||||
|
||||
def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
|
||||
def __init__(self, cfg_schema: dict[str, typing.Any], deprecated: dict[str, str]):
|
||||
"""Constructor of class Config.
|
||||
|
||||
:param cfg_schema: Schema of the configuration
|
||||
@@ -93,10 +106,10 @@ class Config:
|
||||
self.deprecated = deprecated
|
||||
self.cfg = copy.deepcopy(cfg_schema)
|
||||
|
||||
def __getitem__(self, key: str) -> Any:
|
||||
def __getitem__(self, key: str) -> typing.Any:
|
||||
return self.get(key)
|
||||
|
||||
def validate(self, cfg: dict):
|
||||
def validate(self, cfg: dict[str, typing.Any]):
|
||||
"""Validation of dictionary ``cfg`` on :py:obj:`Config.SCHEMA`.
|
||||
Validation is done by :py:obj:`validate`."""
|
||||
|
||||
@@ -111,7 +124,7 @@ class Config:
|
||||
"""Returns default value of field ``name`` in ``self.cfg_schema``."""
|
||||
return value(name, self.cfg_schema)
|
||||
|
||||
def get(self, name: str, default: Any = UNSET, replace: bool = True) -> Any:
|
||||
def get(self, name: str, default: typing.Any = UNSET, replace: bool = True) -> typing.Any:
|
||||
"""Returns the value to which ``name`` points in the configuration.
|
||||
|
||||
If there is no such ``name`` in the config and the ``default`` is
|
||||
@@ -214,8 +227,8 @@ def value(name: str, data_dict: dict):
|
||||
|
||||
|
||||
def validate(
|
||||
schema_dict: typing.Dict, data_dict: typing.Dict, deprecated: typing.Dict[str, str]
|
||||
) -> typing.Tuple[bool, list]:
|
||||
schema_dict: dict[str, typing.Any], data_dict: dict[str, typing.Any], deprecated: dict[str, str]
|
||||
) -> tuple[bool, list[str]]:
|
||||
"""Deep validation of dictionary in ``data_dict`` against dictionary in
|
||||
``schema_dict``. Argument deprecated is a dictionary that maps deprecated
|
||||
configuration names to a messages::
|
||||
|
||||
@@ -20,8 +20,7 @@ from ipaddress import (
|
||||
)
|
||||
|
||||
import werkzeug
|
||||
|
||||
from searx.extended_types import SXNG_Request
|
||||
import flask
|
||||
|
||||
from . import config
|
||||
from ._helpers import too_many_requests
|
||||
@@ -29,7 +28,7 @@ from ._helpers import too_many_requests
|
||||
|
||||
def filter_request(
|
||||
network: IPv4Network | IPv6Network,
|
||||
request: SXNG_Request,
|
||||
request: flask.Request,
|
||||
cfg: config.Config, # pylint: disable=unused-argument
|
||||
) -> werkzeug.Response | None:
|
||||
|
||||
|
||||
@@ -21,8 +21,7 @@ from ipaddress import (
|
||||
)
|
||||
|
||||
import werkzeug
|
||||
|
||||
from searx.extended_types import SXNG_Request
|
||||
import flask
|
||||
|
||||
from . import config
|
||||
from ._helpers import too_many_requests
|
||||
@@ -30,7 +29,7 @@ from ._helpers import too_many_requests
|
||||
|
||||
def filter_request(
|
||||
network: IPv4Network | IPv6Network,
|
||||
request: SXNG_Request,
|
||||
request: flask.Request,
|
||||
cfg: config.Config, # pylint: disable=unused-argument
|
||||
) -> werkzeug.Response | None:
|
||||
|
||||
|
||||
@@ -18,8 +18,7 @@ from ipaddress import (
|
||||
)
|
||||
|
||||
import werkzeug
|
||||
|
||||
from searx.extended_types import SXNG_Request
|
||||
import flask
|
||||
|
||||
from . import config
|
||||
from ._helpers import too_many_requests
|
||||
@@ -27,7 +26,7 @@ from ._helpers import too_many_requests
|
||||
|
||||
def filter_request(
|
||||
network: IPv4Network | IPv6Network,
|
||||
request: SXNG_Request,
|
||||
request: flask.Request,
|
||||
cfg: config.Config, # pylint: disable=unused-argument
|
||||
) -> werkzeug.Response | None:
|
||||
if request.headers.get('Accept-Language', '').strip() == '':
|
||||
|
||||
@@ -18,8 +18,7 @@ from ipaddress import (
|
||||
)
|
||||
|
||||
import werkzeug
|
||||
|
||||
from searx.extended_types import SXNG_Request
|
||||
import flask
|
||||
|
||||
from . import config
|
||||
from ._helpers import too_many_requests
|
||||
@@ -27,7 +26,7 @@ from ._helpers import too_many_requests
|
||||
|
||||
def filter_request(
|
||||
network: IPv4Network | IPv6Network,
|
||||
request: SXNG_Request,
|
||||
request: flask.Request,
|
||||
cfg: config.Config, # pylint: disable=unused-argument
|
||||
) -> werkzeug.Response | None:
|
||||
|
||||
|
||||
@@ -32,8 +32,6 @@ import re
|
||||
import flask
|
||||
import werkzeug
|
||||
|
||||
from searx.extended_types import SXNG_Request
|
||||
|
||||
from . import config
|
||||
from ._helpers import logger
|
||||
|
||||
@@ -78,7 +76,7 @@ def is_browser_supported(user_agent: str) -> bool:
|
||||
|
||||
def filter_request(
|
||||
network: IPv4Network | IPv6Network,
|
||||
request: SXNG_Request,
|
||||
request: flask.Request,
|
||||
cfg: config.Config,
|
||||
) -> werkzeug.Response | None:
|
||||
|
||||
|
||||
@@ -20,8 +20,7 @@ from ipaddress import (
|
||||
)
|
||||
|
||||
import werkzeug
|
||||
|
||||
from searx.extended_types import SXNG_Request
|
||||
import flask
|
||||
|
||||
from . import config
|
||||
from ._helpers import too_many_requests
|
||||
@@ -56,7 +55,7 @@ def regexp_user_agent():
|
||||
|
||||
def filter_request(
|
||||
network: IPv4Network | IPv6Network,
|
||||
request: SXNG_Request,
|
||||
request: flask.Request,
|
||||
cfg: config.Config, # pylint: disable=unused-argument
|
||||
) -> werkzeug.Response | None:
|
||||
|
||||
|
||||
@@ -45,12 +45,11 @@ from ipaddress import (
|
||||
import flask
|
||||
import werkzeug
|
||||
|
||||
from searx.extended_types import SXNG_Request
|
||||
from searx import valkeydb
|
||||
from searx.valkeylib import incr_sliding_window, drop_counter
|
||||
|
||||
from . import link_token
|
||||
from . import config
|
||||
from . import valkeydb
|
||||
from ._helpers import (
|
||||
too_many_requests,
|
||||
logger,
|
||||
@@ -92,12 +91,12 @@ SUSPICIOUS_IP_MAX = 3
|
||||
|
||||
def filter_request(
|
||||
network: IPv4Network | IPv6Network,
|
||||
request: SXNG_Request,
|
||||
request: flask.Request,
|
||||
cfg: config.Config,
|
||||
) -> werkzeug.Response | None:
|
||||
|
||||
# pylint: disable=too-many-return-statements
|
||||
valkey_client = valkeydb.client()
|
||||
valkey_client = valkeydb.get_valkey_client()
|
||||
|
||||
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
|
||||
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
|
||||
|
||||
@@ -4,21 +4,22 @@
|
||||
Method ``ip_lists``
|
||||
-------------------
|
||||
|
||||
The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
|
||||
:py:obj:`pass-lists <pass_ip>`.
|
||||
The ``ip_lists`` method implements :py:obj:`block-list <block_ip>` and
|
||||
:py:obj:`pass-list <pass_ip>`.
|
||||
|
||||
.. code:: toml
|
||||
|
||||
[botdetection.ip_lists]
|
||||
|
||||
pass_ip = [
|
||||
'167.235.158.251', # IPv4 of check.searx.space
|
||||
'192.168.0.0/16', # IPv4 private network
|
||||
'fe80::/10' # IPv6 linklocal
|
||||
'167.235.158.251', # IPv4 of check.searx.space
|
||||
'192.168.0.0/16', # IPv4 private network
|
||||
'fe80::/10', # IPv6 linklocal
|
||||
]
|
||||
|
||||
block_ip = [
|
||||
'93.184.216.34', # IPv4 of example.org
|
||||
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
|
||||
'93.184.216.34', # IPv4 of example.org
|
||||
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
|
||||
]
|
||||
|
||||
"""
|
||||
@@ -72,7 +73,6 @@ def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bo
|
||||
def ip_is_subnet_of_member_in_list(
|
||||
real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
|
||||
) -> Tuple[bool, str]:
|
||||
|
||||
for net in cfg.get(list_name, default=[]):
|
||||
try:
|
||||
net = ip_network(net, strict=False)
|
||||
|
||||
@@ -43,17 +43,18 @@ from ipaddress import (
|
||||
|
||||
import string
|
||||
import random
|
||||
import flask
|
||||
|
||||
from searx import logger
|
||||
from searx import valkeydb
|
||||
from searx.valkeylib import secret_hash
|
||||
from searx.extended_types import SXNG_Request
|
||||
|
||||
from ._helpers import (
|
||||
get_network,
|
||||
get_real_ip,
|
||||
logger,
|
||||
)
|
||||
|
||||
from . import config
|
||||
from . import valkeydb
|
||||
|
||||
TOKEN_LIVE_TIME = 600
|
||||
"""Lifetime (sec) of limiter's CSS token."""
|
||||
|
||||
@@ -69,17 +70,14 @@ TOKEN_KEY = 'SearXNG_limiter.token'
|
||||
logger = logger.getChild('botdetection.link_token')
|
||||
|
||||
|
||||
def is_suspicious(network: IPv4Network | IPv6Network, request: SXNG_Request, renew: bool = False):
|
||||
def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
|
||||
"""Checks whether a valid ping is exists for this (client) network, if not
|
||||
this request is rated as *suspicious*. If a valid ping exists and argument
|
||||
``renew`` is ``True`` the expire time of this ping is reset to
|
||||
:py:obj:`PING_LIVE_TIME`.
|
||||
|
||||
"""
|
||||
valkey_client = valkeydb.client()
|
||||
if not valkey_client:
|
||||
return False
|
||||
|
||||
valkey_client = valkeydb.get_valkey_client()
|
||||
ping_key = get_ping_key(network, request)
|
||||
if not valkey_client.get(ping_key):
|
||||
logger.info("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
|
||||
@@ -92,28 +90,29 @@ def is_suspicious(network: IPv4Network | IPv6Network, request: SXNG_Request, ren
|
||||
return False
|
||||
|
||||
|
||||
def ping(request: SXNG_Request, token: str):
|
||||
def ping(request: flask.Request, token: str):
|
||||
"""This function is called by a request to URL ``/client<token>.css``. If
|
||||
``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
|
||||
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
|
||||
|
||||
"""
|
||||
from . import valkey_client, cfg # pylint: disable=import-outside-toplevel, cyclic-import
|
||||
valkey_client = valkeydb.get_valkey_client()
|
||||
cfg = config.get_global_cfg()
|
||||
|
||||
if not valkey_client:
|
||||
return
|
||||
if not token_is_valid(token):
|
||||
return
|
||||
|
||||
real_ip = ip_address(get_real_ip(request))
|
||||
real_ip = ip_address(request.remote_addr) # type: ignore
|
||||
network = get_network(real_ip, cfg)
|
||||
|
||||
ping_key = get_ping_key(network, request)
|
||||
logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
|
||||
logger.debug(
|
||||
"store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip.compressed, ping_key
|
||||
)
|
||||
valkey_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
||||
|
||||
|
||||
def get_ping_key(network: IPv4Network | IPv6Network, request: SXNG_Request) -> str:
|
||||
def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
|
||||
"""Generates a hashed key that fits (more or less) to a *WEB-browser
|
||||
session* in a network."""
|
||||
return (
|
||||
@@ -134,20 +133,23 @@ def token_is_valid(token) -> bool:
|
||||
|
||||
def get_token() -> str:
|
||||
"""Returns current token. If there is no currently active token a new token
|
||||
is generated randomly and stored in the valkey DB.
|
||||
is generated randomly and stored in the Valkey DB. Without without a
|
||||
database connection, string "12345678" is returned.
|
||||
|
||||
- :py:obj:`TOKEN_LIVE_TIME`
|
||||
- :py:obj:`TOKEN_KEY`
|
||||
|
||||
"""
|
||||
valkey_client = valkeydb.client()
|
||||
if not valkey_client:
|
||||
try:
|
||||
valkey_client = valkeydb.get_valkey_client()
|
||||
except ValueError:
|
||||
# This function is also called when limiter is inactive / no valkey DB
|
||||
# (see render function in webapp.py)
|
||||
return '12345678'
|
||||
|
||||
token = valkey_client.get(TOKEN_KEY)
|
||||
if token:
|
||||
token = token.decode('UTF-8')
|
||||
token = token.decode('UTF-8') # type: ignore
|
||||
else:
|
||||
token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
|
||||
valkey_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
|
||||
|
||||
175
searx/botdetection/trusted_proxies.py
Normal file
175
searx/botdetection/trusted_proxies.py
Normal file
@@ -0,0 +1,175 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementation of a middleware to determine the real IP of an HTTP request
|
||||
(:py:obj:`flask.request.remote_addr`) behind a proxy chain."""
|
||||
# pylint: disable=too-many-branches
|
||||
|
||||
from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
from collections import abc
|
||||
from ipaddress import IPv4Address, IPv6Address, ip_address, ip_network, IPv4Network, IPv6Network
|
||||
from werkzeug.http import parse_list_header
|
||||
|
||||
from . import config
|
||||
from ._helpers import log_error_only_once, logger
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from _typeshed.wsgi import StartResponse
|
||||
from _typeshed.wsgi import WSGIApplication
|
||||
from _typeshed.wsgi import WSGIEnvironment
|
||||
|
||||
|
||||
class ProxyFix:
|
||||
"""A middleware like the ProxyFix_ class, where the `x_for` argument is
|
||||
replaced by a method that determines the number of trusted proxies via
|
||||
the `botdetection.trusted_proxies` setting.
|
||||
|
||||
.. sidebar:: :py:obj:`flask.Request.remote_addr`
|
||||
|
||||
SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
|
||||
|
||||
The remote IP (py:obj:`flask.Request.remote_addr`) of the request is taken
|
||||
from (first match):
|
||||
|
||||
- X-Forwarded-For_: If the header is set, the first untrusted IP that comes
|
||||
before the IPs that are still part of the ``botdetection.trusted_proxies``
|
||||
is used.
|
||||
|
||||
- `X-Real-IP <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__:
|
||||
If X-Forwarded-For_ is not set, `X-Real-IP` is used
|
||||
(``botdetection.trusted_proxies`` is ignored).
|
||||
|
||||
If none of the header is set, the REMOTE_ADDR_ from the WSGI layer is used.
|
||||
If (for whatever reasons) none IP can be determined, an error message is
|
||||
displayed and ``100::`` is used instead (:rfc:`6666`).
|
||||
|
||||
.. _ProxyFix:
|
||||
https://werkzeug.palletsprojects.com/middleware/proxy_fix/
|
||||
|
||||
.. _X-Forwarded-For:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
||||
|
||||
.. _REMOTE_ADDR:
|
||||
https://wsgi.readthedocs.io/en/latest/proposals-2.0.html#making-some-keys-required
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, wsgi_app: WSGIApplication) -> None:
|
||||
self.wsgi_app = wsgi_app
|
||||
|
||||
def trusted_proxies(self) -> list[IPv4Network | IPv6Network]:
|
||||
cfg = config.get_global_cfg()
|
||||
proxy_list: list[str] = cfg.get("botdetection.trusted_proxies", default=[])
|
||||
return [ip_network(net, strict=False) for net in proxy_list]
|
||||
|
||||
def trusted_remote_addr(
|
||||
self,
|
||||
x_forwarded_for: list[IPv4Address | IPv6Address],
|
||||
trusted_proxies: list[IPv4Network | IPv6Network],
|
||||
) -> str:
|
||||
# always rtl
|
||||
for addr in reversed(x_forwarded_for):
|
||||
trust: bool = False
|
||||
|
||||
for net in trusted_proxies:
|
||||
if addr.version == net.version and addr in net:
|
||||
logger.debug("trust proxy %s (member of %s)", addr, net)
|
||||
trust = True
|
||||
break
|
||||
|
||||
# client address
|
||||
if not trust:
|
||||
return addr.compressed
|
||||
|
||||
# fallback to first address
|
||||
return x_forwarded_for[0].compressed
|
||||
|
||||
def __call__(self, environ: WSGIEnvironment, start_response: StartResponse) -> abc.Iterable[bytes]:
|
||||
# pylint: disable=too-many-statements
|
||||
|
||||
trusted_proxies = self.trusted_proxies()
|
||||
|
||||
# We do not rely on the REMOTE_ADDR from the WSGI environment / the
|
||||
# variable is first removed from the WSGI environment and explicitly set
|
||||
# in this function!
|
||||
|
||||
orig_remote_addr: str | None = environ.pop("REMOTE_ADDR")
|
||||
|
||||
# Validate the IPs involved in this game and delete all invalid ones
|
||||
# from the WSGI environment.
|
||||
|
||||
if orig_remote_addr:
|
||||
try:
|
||||
addr = ip_address(orig_remote_addr)
|
||||
if addr.version == 6 and addr.ipv4_mapped:
|
||||
addr = addr.ipv4_mapped
|
||||
orig_remote_addr = addr.compressed
|
||||
except ValueError as exc:
|
||||
logger.error("REMOTE_ADDR: %s / discard REMOTE_ADDR from WSGI environment", exc)
|
||||
orig_remote_addr = None
|
||||
|
||||
x_real_ip: str | None = environ.get("HTTP_X_REAL_IP")
|
||||
if x_real_ip:
|
||||
try:
|
||||
addr = ip_address(x_real_ip)
|
||||
if addr.version == 6 and addr.ipv4_mapped:
|
||||
addr = addr.ipv4_mapped
|
||||
x_real_ip = addr.compressed
|
||||
except ValueError as exc:
|
||||
logger.error("X-Real-IP: %s / discard HTTP_X_REAL_IP from WSGI environment", exc)
|
||||
environ.pop("HTTP_X_REAL_IP")
|
||||
x_real_ip = None
|
||||
|
||||
x_forwarded_for: list[IPv4Address | IPv6Address] = []
|
||||
if environ.get("HTTP_X_FORWARDED_FOR"):
|
||||
for x_for_ip in parse_list_header(str(environ.get("HTTP_X_FORWARDED_FOR"))):
|
||||
try:
|
||||
addr = ip_address(x_for_ip)
|
||||
except ValueError as exc:
|
||||
logger.error("X-Forwarded-For: %s / discard HTTP_X_FORWARDED_FOR from WSGI environment", exc)
|
||||
environ.pop("HTTP_X_FORWARDED_FOR")
|
||||
x_forwarded_for = []
|
||||
break
|
||||
|
||||
if addr.version == 6 and addr.ipv4_mapped:
|
||||
addr = addr.ipv4_mapped
|
||||
x_forwarded_for.append(addr)
|
||||
|
||||
# log questionable WSGI environments
|
||||
|
||||
if not x_forwarded_for and not x_real_ip:
|
||||
log_error_only_once("X-Forwarded-For nor X-Real-IP header is set!")
|
||||
|
||||
if x_forwarded_for and not trusted_proxies:
|
||||
log_error_only_once("missing botdetection.trusted_proxies config")
|
||||
# without trusted_proxies, this variable is useless for determining
|
||||
# the real IP
|
||||
x_forwarded_for = []
|
||||
|
||||
# securing the WSGI environment variables that are adjusted
|
||||
|
||||
environ.update({"botdetection.trusted_proxies.orig": {"REMOTE_ADDR": orig_remote_addr}})
|
||||
|
||||
# determine *the real IP*
|
||||
|
||||
if x_forwarded_for:
|
||||
environ["REMOTE_ADDR"] = self.trusted_remote_addr(x_forwarded_for, trusted_proxies)
|
||||
|
||||
elif x_real_ip:
|
||||
environ["REMOTE_ADDR"] = x_real_ip
|
||||
|
||||
elif orig_remote_addr:
|
||||
environ["REMOTE_ADDR"] = orig_remote_addr
|
||||
|
||||
else:
|
||||
logger.error("No remote IP could be determined, use black-hole address: 100::")
|
||||
environ["REMOTE_ADDR"] = "100::"
|
||||
|
||||
try:
|
||||
_ = ip_address(environ["REMOTE_ADDR"])
|
||||
except ValueError as exc:
|
||||
logger.error("REMOTE_ADDR: %s, use black-hole address: 100::", exc)
|
||||
environ["REMOTE_ADDR"] = "100::"
|
||||
|
||||
logger.debug("final REMOTE_ADDR is: %s", environ["REMOTE_ADDR"])
|
||||
return self.wsgi_app(environ, start_response)
|
||||
22
searx/botdetection/valkeydb.py
Normal file
22
searx/botdetection/valkeydb.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Providing a Valkey database for the botdetection methods."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import valkey
|
||||
|
||||
__all__ = ["set_valkey_client", "get_valkey_client"]
|
||||
|
||||
CLIENT: valkey.Valkey | None = None
|
||||
"""Global Valkey DB connection (Valkey client object)."""
|
||||
|
||||
|
||||
def set_valkey_client(valkey_client: valkey.Valkey):
|
||||
global CLIENT # pylint: disable=global-statement
|
||||
CLIENT = valkey_client
|
||||
|
||||
|
||||
def get_valkey_client() -> valkey.Valkey:
|
||||
if CLIENT is None:
|
||||
raise ValueError("No connection to the Valkey database has been established.")
|
||||
return CLIENT
|
||||
Reference in New Issue
Block a user