mirror of https://github.com/searxng/searxng.git
Merge pull request #2484 from return42/limiter-ip_lists
[mod] limiter: blocklist and passlist (ip_lists)
This commit is contained in:
commit
b295b497f7
|
@ -15,6 +15,9 @@ Bot Detection
|
||||||
.. automodule:: searx.botdetection.limiter
|
.. automodule:: searx.botdetection.limiter
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
.. automodule:: searx.botdetection.ip_lists
|
||||||
|
:members:
|
||||||
|
|
||||||
|
|
||||||
Rate limit
|
Rate limit
|
||||||
==========
|
==========
|
||||||
|
|
|
@ -6,8 +6,8 @@ from __future__ import annotations
|
||||||
from ipaddress import (
|
from ipaddress import (
|
||||||
IPv4Network,
|
IPv4Network,
|
||||||
IPv6Network,
|
IPv6Network,
|
||||||
|
IPv4Address,
|
||||||
IPv6Address,
|
IPv6Address,
|
||||||
ip_address,
|
|
||||||
ip_network,
|
ip_network,
|
||||||
)
|
)
|
||||||
import flask
|
import flask
|
||||||
|
@ -46,11 +46,10 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz
|
||||||
return flask.make_response(('Too Many Requests', 429))
|
return flask.make_response(('Too Many Requests', 429))
|
||||||
|
|
||||||
|
|
||||||
def get_network(real_ip: str, cfg: config.Config) -> IPv4Network | IPv6Network:
|
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
|
||||||
"""Returns the (client) network of whether the real_ip is part of."""
|
"""Returns the (client) network of whether the real_ip is part of."""
|
||||||
|
|
||||||
ip = ip_address(real_ip)
|
if real_ip.version == 6:
|
||||||
if isinstance(ip, IPv6Address):
|
|
||||||
prefix = cfg['real_ip.ipv6_prefix']
|
prefix = cfg['real_ip.ipv6_prefix']
|
||||||
else:
|
else:
|
||||||
prefix = cfg['real_ip.ipv4_prefix']
|
prefix = cfg['real_ip.ipv4_prefix']
|
||||||
|
@ -99,7 +98,7 @@ def get_real_ip(request: flask.Request) -> str:
|
||||||
from .limiter import get_cfg # pylint: disable=import-outside-toplevel, cyclic-import
|
from .limiter import get_cfg # pylint: disable=import-outside-toplevel, cyclic-import
|
||||||
|
|
||||||
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
|
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
|
||||||
x_for: int = get_cfg()['real_ip.x_for']
|
x_for: int = get_cfg()['real_ip.x_for'] # type: ignore
|
||||||
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
|
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
|
||||||
|
|
||||||
if not real_ip:
|
if not real_ip:
|
||||||
|
|
|
@ -49,14 +49,16 @@ import werkzeug
|
||||||
from searx.tools import config
|
from searx.tools import config
|
||||||
|
|
||||||
from searx import redisdb
|
from searx import redisdb
|
||||||
from searx import logger
|
|
||||||
from searx.redislib import incr_sliding_window, drop_counter
|
from searx.redislib import incr_sliding_window, drop_counter
|
||||||
|
|
||||||
from . import link_token
|
from . import link_token
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import (
|
||||||
|
too_many_requests,
|
||||||
|
logger,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('botdetection.ip_limit')
|
logger = logger.getChild('ip_limit')
|
||||||
|
|
||||||
BURST_WINDOW = 20
|
BURST_WINDOW = 20
|
||||||
"""Time (sec) before sliding window for *burst* requests expires."""
|
"""Time (sec) before sliding window for *burst* requests expires."""
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# lint: pylint
|
||||||
|
""".. _botdetection.ip_lists:
|
||||||
|
|
||||||
|
Method ``ip_lists``
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
|
||||||
|
:py:obj:`pass-lists <pass_ip>`.
|
||||||
|
|
||||||
|
.. code:: toml
|
||||||
|
|
||||||
|
[botdetection.ip_lists]
|
||||||
|
|
||||||
|
pass_ip = [
|
||||||
|
'140.238.172.132', # IPv4 of check.searx.space
|
||||||
|
'192.168.0.0/16', # IPv4 private network
|
||||||
|
'fe80::/10' # IPv6 linklocal
|
||||||
|
]
|
||||||
|
block_ip = [
|
||||||
|
'93.184.216.34', # IPv4 of example.org
|
||||||
|
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
|
||||||
|
]
|
||||||
|
|
||||||
|
"""
|
||||||
|
# pylint: disable=unused-argument
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import Tuple
|
||||||
|
from ipaddress import (
|
||||||
|
ip_network,
|
||||||
|
IPv4Address,
|
||||||
|
IPv6Address,
|
||||||
|
)
|
||||||
|
|
||||||
|
from searx.tools import config
|
||||||
|
from ._helpers import logger
|
||||||
|
|
||||||
|
logger = logger.getChild('ip_limit')
|
||||||
|
|
||||||
|
SEARXNG_ORG = [
|
||||||
|
# https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
|
||||||
|
'140.238.172.132', # IPv4 check.searx.space
|
||||||
|
'2603:c022:0:4900::/56', # IPv6 check.searx.space
|
||||||
|
]
|
||||||
|
"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
|
||||||
|
|
||||||
|
|
||||||
|
def pass_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
|
||||||
|
"""Checks if the IP on the subnet is in one of the members of the
|
||||||
|
``botdetection.ip_lists.pass_ip`` list.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if cfg.get('botdetection.ip_lists.pass_searxng_org', default=True):
|
||||||
|
for net in SEARXNG_ORG:
|
||||||
|
net = ip_network(net, strict=False)
|
||||||
|
if real_ip.version == net.version and real_ip in net:
|
||||||
|
return True, f"IP matches {net.compressed} in SEARXNG_ORG list."
|
||||||
|
return ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.pass_ip', cfg)
|
||||||
|
|
||||||
|
|
||||||
|
def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
|
||||||
|
"""Checks if the IP on the subnet is in one of the members of the
|
||||||
|
``botdetection.ip_lists.block_ip`` list.
|
||||||
|
"""
|
||||||
|
|
||||||
|
block, msg = ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.block_ip', cfg)
|
||||||
|
if block:
|
||||||
|
msg += " To remove IP from list, please contact the maintainer of the service."
|
||||||
|
return block, msg
|
||||||
|
|
||||||
|
|
||||||
|
def ip_is_subnet_of_member_in_list(
|
||||||
|
real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
|
||||||
|
) -> Tuple[bool, str]:
|
||||||
|
|
||||||
|
for net in cfg.get(list_name, default=[]):
|
||||||
|
try:
|
||||||
|
net = ip_network(net, strict=False)
|
||||||
|
except ValueError:
|
||||||
|
logger.error("invalid IP %s in %s", net, list_name)
|
||||||
|
continue
|
||||||
|
if real_ip.version == net.version and real_ip in net:
|
||||||
|
return True, f"IP matches {net.compressed} in {list_name}."
|
||||||
|
return False, f"IP is not a member of an item in the f{list_name} list"
|
|
@ -40,6 +40,7 @@ and set the redis-url connection. Check the value, it depends on your redis DB
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from ipaddress import ip_address
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
|
@ -53,6 +54,7 @@ from . import (
|
||||||
http_connection,
|
http_connection,
|
||||||
http_user_agent,
|
http_user_agent,
|
||||||
ip_limit,
|
ip_limit,
|
||||||
|
ip_lists,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ._helpers import (
|
from ._helpers import (
|
||||||
|
@ -84,16 +86,41 @@ def get_cfg() -> config.Config:
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request) -> werkzeug.Response | None:
|
def filter_request(request: flask.Request) -> werkzeug.Response | None:
|
||||||
|
# pylint: disable=too-many-return-statements
|
||||||
|
|
||||||
cfg = get_cfg()
|
cfg = get_cfg()
|
||||||
real_ip = get_real_ip(request)
|
real_ip = ip_address(get_real_ip(request))
|
||||||
network = get_network(real_ip, cfg)
|
network = get_network(real_ip, cfg)
|
||||||
if network.is_link_local:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if request.path == '/healthz':
|
if request.path == '/healthz':
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# link-local
|
||||||
|
|
||||||
|
if network.is_link_local:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# block- & pass- lists
|
||||||
|
#
|
||||||
|
# 1. The IP of the request is first checked against the pass-list; if the IP
|
||||||
|
# matches an entry in the list, the request is not blocked.
|
||||||
|
# 2. If no matching entry is found in the pass-list, then a check is made against
|
||||||
|
# the block list; if the IP matches an entry in the list, the request is
|
||||||
|
# blocked.
|
||||||
|
# 3. If the IP is not in either list, the request is not blocked.
|
||||||
|
|
||||||
|
match, msg = ip_lists.pass_ip(real_ip, cfg)
|
||||||
|
if match:
|
||||||
|
logger.warning("PASS %s: matched PASSLIST - %s", network.compressed, msg)
|
||||||
|
return None
|
||||||
|
|
||||||
|
match, msg = ip_lists.block_ip(real_ip, cfg)
|
||||||
|
if match:
|
||||||
|
logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
|
||||||
|
return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
|
||||||
|
|
||||||
|
# methods applied on /
|
||||||
|
|
||||||
for func in [
|
for func in [
|
||||||
http_user_agent,
|
http_user_agent,
|
||||||
]:
|
]:
|
||||||
|
@ -101,6 +128,8 @@ def filter_request(request: flask.Request) -> werkzeug.Response | None:
|
||||||
if val is not None:
|
if val is not None:
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
# methods applied on /search
|
||||||
|
|
||||||
if request.path == '/search':
|
if request.path == '/search':
|
||||||
|
|
||||||
for func in [
|
for func in [
|
||||||
|
|
|
@ -16,7 +16,25 @@ ipv6_prefix = 48
|
||||||
# (networks) are not monitored by the ip_limit
|
# (networks) are not monitored by the ip_limit
|
||||||
filter_link_local = false
|
filter_link_local = false
|
||||||
|
|
||||||
# acrivate link_token method in the ip_limit method
|
# activate link_token method in the ip_limit method
|
||||||
link_token = false
|
link_token = false
|
||||||
|
|
||||||
|
[botdetection.ip_lists]
|
||||||
|
|
||||||
|
# In the limiter, the ip_lists method has priority over all other methods -> if
|
||||||
|
# an IP is in the pass_ip list, it has unrestricted access and it is also not
|
||||||
|
# checked if e.g. the "user agent" suggests a bot (e.g. curl).
|
||||||
|
|
||||||
|
block_ip = [
|
||||||
|
# '93.184.216.34', # IPv4 of example.org
|
||||||
|
# '257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
|
||||||
|
]
|
||||||
|
|
||||||
|
pass_ip = [
|
||||||
|
# '192.168.0.0/16', # IPv4 private network
|
||||||
|
# 'fe80::/10' # IPv6 linklocal / wins over botdetection.ip_limit.filter_link_local
|
||||||
|
]
|
||||||
|
|
||||||
|
# Activate passlist of (hardcoded) IPs from the SearXNG organization,
|
||||||
|
# e.g. `check.searx.space`.
|
||||||
|
pass_searxng_org = true
|
|
@ -39,6 +39,7 @@ from __future__ import annotations
|
||||||
from ipaddress import (
|
from ipaddress import (
|
||||||
IPv4Network,
|
IPv4Network,
|
||||||
IPv6Network,
|
IPv6Network,
|
||||||
|
ip_address,
|
||||||
)
|
)
|
||||||
|
|
||||||
import string
|
import string
|
||||||
|
@ -107,7 +108,7 @@ def ping(request: flask.Request, token: str):
|
||||||
return
|
return
|
||||||
|
|
||||||
cfg = limiter.get_cfg()
|
cfg = limiter.get_cfg()
|
||||||
real_ip = get_real_ip(request)
|
real_ip = ip_address(get_real_ip(request))
|
||||||
network = get_network(real_ip, cfg)
|
network = get_network(real_ip, cfg)
|
||||||
|
|
||||||
ping_key = get_ping_key(network, request)
|
ping_key = get_ping_key(network, request)
|
||||||
|
|
|
@ -8,6 +8,7 @@ structured dictionaries. The configuration schema is defined in a dictionary
|
||||||
structure and the configuration data is given in a dictionary structure.
|
structure and the configuration data is given in a dictionary structure.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
import typing
|
import typing
|
||||||
|
@ -97,7 +98,7 @@ class Config:
|
||||||
self.deprecated = deprecated
|
self.deprecated = deprecated
|
||||||
self.cfg = copy.deepcopy(cfg_schema)
|
self.cfg = copy.deepcopy(cfg_schema)
|
||||||
|
|
||||||
def __getitem__(self, key: str):
|
def __getitem__(self, key: str) -> Any:
|
||||||
return self.get(key)
|
return self.get(key)
|
||||||
|
|
||||||
def validate(self, cfg: dict):
|
def validate(self, cfg: dict):
|
||||||
|
@ -115,7 +116,7 @@ class Config:
|
||||||
"""Returns default value of field ``name`` in ``self.cfg_schema``."""
|
"""Returns default value of field ``name`` in ``self.cfg_schema``."""
|
||||||
return value(name, self.cfg_schema)
|
return value(name, self.cfg_schema)
|
||||||
|
|
||||||
def get(self, name: str, default=UNSET, replace=True):
|
def get(self, name: str, default: Any = UNSET, replace: bool = True) -> Any:
|
||||||
"""Returns the value to which ``name`` points in the configuration.
|
"""Returns the value to which ``name`` points in the configuration.
|
||||||
|
|
||||||
If there is no such ``name`` in the config and the ``default`` is
|
If there is no such ``name`` in the config and the ``default`` is
|
||||||
|
|
Loading…
Reference in New Issue