[enh] Add multiple outgoing proxies

credits go to @bauruine see https://github.com/searx/searx/pull/1958
This commit is contained in:
Alexandre Flament 2020-11-16 12:44:07 +01:00
parent 2fc3b17c85
commit 3786920df9
7 changed files with 172 additions and 36 deletions

View File

@ -36,18 +36,26 @@ Global Settings
image_proxy : False # proxying image results through searx image_proxy : False # proxying image results through searx
default_locale : "" # default interface locale default_locale : "" # default interface locale
# uncomment below section if you want to use a proxy outgoing: # communication with search engines
request_timeout : 2.0 # default timeout in seconds, can be override by engine
# max_request_timeout: 10.0 # the maximum timeout in seconds
useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator
pool_connections : 100 # Number of different hosts
pool_maxsize : 10 # Number of simultaneous requests by host
#outgoing_proxies : #proxies:
# http : http://127.0.0.1:8080 # http:
# https: http://127.0.0.1:8080 # - http://proxy1:8080
# - http://proxy2:8080
# https:
# - http://proxy1:8080
# - http://proxy2:8080
# - socks5://user:password@proxy3:1080
# - socks5h://user:password@proxy4:1080
# uncomment below section only if you have more than one network interface #source_ips:
# which can be the source of outgoing search requests # - 1.1.1.1
# - 1.1.1.2
#source_ips:
# - 1.1.1.1
# - 1.1.1.2
locales: locales:
en : English en : English
@ -105,15 +113,16 @@ Global Settings
code, like ``fr``, ``en``, ``de``. code, like ``fr``, ``en``, ``de``.
.. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies .. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies
.. _PR SOCKS support: https://github.com/kennethreitz/requests/pull/478 .. _PySocks: https://pypi.org/project/PySocks/
``outgoing_proxies`` : ``proxies`` :
Define a proxy you wish to use, see `requests proxies`_. SOCKS proxies are Define one or more proxies you wish to use, see `requests proxies`_.
not supported / see `PR SOCKS support`. If there are more than one proxy for one protocol (http, https),
requests to the engines are distributed in a round-robin fashion.
``source_ips`` : ``source_ips`` :
If you use multiple network interfaces, define from which IP the requests must If you use multiple network interfaces, define from which IP the requests must
be made. be made. This parameter is ignored when ``proxies`` is set.
``locales`` : ``locales`` :
Locales codes and their names. Available translations of searx interface. Locales codes and their names. Available translations of searx interface.
@ -139,6 +148,15 @@ Engine settings
api_key : 'apikey' api_key : 'apikey'
disabled : True disabled : True
language : en_US language : en_US
#proxies:
# http:
# - http://proxy1:8080
# - http://proxy2:8080
# https:
# - http://proxy1:8080
# - http://proxy2:8080
# - socks5://user:password@proxy3:1080
# - socks5h://user:password@proxy4:1080
``name`` : ``name`` :
Name that will be used across searx to define this engine. In settings, on Name that will be used across searx to define this engine. In settings, on

View File

@ -25,7 +25,7 @@ from operator import itemgetter
from searx import settings from searx import settings
from searx import logger from searx import logger
from searx.data import ENGINES_LANGUAGES from searx.data import ENGINES_LANGUAGES
from searx.poolrequests import get from searx.poolrequests import get, get_proxy_cycles
from searx.utils import load_module, match_language, get_engine_from_settings from searx.utils import load_module, match_language, get_engine_from_settings
@ -79,16 +79,18 @@ def load_engine(engine_data):
logger.exception('Cannot load engine "{}"'.format(engine_module)) logger.exception('Cannot load engine "{}"'.format(engine_module))
return None return None
for param_name in engine_data: for param_name, param_value in engine_data.items():
if param_name == 'engine': if param_name == 'engine':
continue pass
if param_name == 'categories': elif param_name == 'categories':
if engine_data['categories'] == 'none': if param_value == 'none':
engine.categories = [] engine.categories = []
else: else:
engine.categories = list(map(str.strip, engine_data['categories'].split(','))) engine.categories = list(map(str.strip, param_value.split(',')))
continue elif param_name == 'proxies':
setattr(engine, param_name, engine_data[param_name]) engine.proxies = get_proxy_cycles(param_value)
else:
setattr(engine, param_name, param_value)
for arg_name, arg_value in engine_default_args.items(): for arg_name, arg_value in engine_default_args.items():
if not hasattr(engine, arg_name): if not hasattr(engine, arg_name):

View File

@ -111,6 +111,32 @@ def get_time_for_thread():
return threadLocal.total_time return threadLocal.total_time
def get_proxy_cycles(proxy_settings):
if not proxy_settings:
return None
# Backwards compatibility for single proxy in settings.yml
for protocol, proxy in proxy_settings.items():
if isinstance(proxy, str):
proxy_settings[protocol] = [proxy]
for protocol in proxy_settings:
proxy_settings[protocol] = cycle(proxy_settings[protocol])
return proxy_settings
GLOBAL_PROXY_CYCLES = get_proxy_cycles(settings['outgoing'].get('proxies'))
def get_proxies(proxy_cycles):
if proxy_cycles:
return {protocol: next(proxy_cycle) for protocol, proxy_cycle in proxy_cycles.items()}
return None
def get_global_proxies():
return get_proxies(GLOBAL_PROXY_CYCLES)
def request(method, url, **kwargs): def request(method, url, **kwargs):
"""same as requests/requests/api.py request(...)""" """same as requests/requests/api.py request(...)"""
time_before_request = time() time_before_request = time()
@ -119,8 +145,8 @@ def request(method, url, **kwargs):
session = SessionSinglePool() session = SessionSinglePool()
# proxies # proxies
if kwargs.get('proxies') is None: if not kwargs.get('proxies'):
kwargs['proxies'] = settings['outgoing'].get('proxies') kwargs['proxies'] = get_global_proxies()
# timeout # timeout
if 'timeout' in kwargs: if 'timeout' in kwargs:

View File

@ -119,7 +119,7 @@ def send_http_request(engine, request_params):
# setting engine based proxies # setting engine based proxies
if hasattr(engine, 'proxies'): if hasattr(engine, 'proxies'):
request_args['proxies'] = engine.proxies request_args['proxies'] = requests_lib.get_proxies(engine.proxies)
# specific type of request (GET or POST) # specific type of request (GET or POST)
if request_params['method'] == 'GET': if request_params['method'] == 'GET':

View File

@ -63,13 +63,15 @@ outgoing: # communication with search engines
pool_connections : 100 # Number of different hosts pool_connections : 100 # Number of different hosts
pool_maxsize : 10 # Number of simultaneous requests by host pool_maxsize : 10 # Number of simultaneous requests by host
# uncomment below section if you want to use a proxy # uncomment below section if you want to use a proxy
# see http://docs.python-requests.org/en/latest/user/advanced/#proxies # see https://2.python-requests.org/en/latest/user/advanced/#proxies
# SOCKS proxies are also supported: see http://requests.readthedocs.io/en/master/user/advanced/#socks # SOCKS proxies are also supported: see https://2.python-requests.org/en/latest/user/advanced/#socks
# proxies : # proxies:
# http : socks5h://127.0.0.1:9050 # http:
# https: socks5h://127.0.0.1:9050 # - http://proxy1:8080
# using_tor_proxy : True # - http://proxy2:8080
# extra_proxy_timeout : 10.0 # Extra seconds to add in order to account for the time taken by the proxy # https:
# - http://proxy1:8080
# - http://proxy2:8080
# uncomment below section only if you have more than one network interface # uncomment below section only if you have more than one network interface
# which can be the source of outgoing search requests # which can be the source of outgoing search requests
# source_ips: # source_ips:

View File

@ -78,6 +78,7 @@ from searx.plugins import plugins
from searx.plugins.oa_doi_rewrite import get_doi_resolver from searx.plugins.oa_doi_rewrite import get_doi_resolver
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers from searx.answerers import answerers
from searx.poolrequests import get_global_proxies
# serve pages with HTTP/1.1 # serve pages with HTTP/1.1
@ -149,8 +150,6 @@ _category_names = (gettext('files'),
gettext('onions'), gettext('onions'),
gettext('science')) gettext('science'))
outgoing_proxies = settings['outgoing'].get('proxies') or None
_flask_babel_get_translations = flask_babel.get_translations _flask_babel_get_translations = flask_babel.get_translations
@ -905,7 +904,7 @@ def image_proxy():
stream=True, stream=True,
timeout=settings['outgoing']['request_timeout'], timeout=settings['outgoing']['request_timeout'],
headers=headers, headers=headers,
proxies=outgoing_proxies) proxies=get_global_proxies())
if resp.status_code == 304: if resp.status_code == 304:
return '', resp.status_code return '', resp.status_code

View File

@ -0,0 +1,89 @@
from unittest.mock import patch
from requests.models import Response
from searx.testing import SearxTestCase
import searx.poolrequests
from searx.poolrequests import get_proxy_cycles, get_proxies
CONFIG = {'http': ['http://localhost:9090', 'http://localhost:9092'],
'https': ['http://localhost:9091', 'http://localhost:9093']}
class TestProxy(SearxTestCase):
def test_noconfig(self):
cycles = get_proxy_cycles(None)
self.assertIsNone(cycles)
cycles = get_proxy_cycles(False)
self.assertIsNone(cycles)
def test_oldconfig(self):
config = {
'http': 'http://localhost:9090',
'https': 'http://localhost:9091',
}
cycles = get_proxy_cycles(config)
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
def test_one_proxy(self):
config = {
'http': ['http://localhost:9090'],
'https': ['http://localhost:9091'],
}
cycles = get_proxy_cycles(config)
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
def test_multiple_proxies(self):
cycles = get_proxy_cycles(CONFIG)
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
self.assertEqual(next(cycles['http']), 'http://localhost:9092')
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
self.assertEqual(next(cycles['https']), 'http://localhost:9093')
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
def test_getproxies_none(self):
self.assertIsNone(get_proxies(None))
def test_getproxies_config(self):
cycles = get_proxy_cycles(CONFIG)
self.assertEqual(get_proxies(cycles), {
'http': 'http://localhost:9090',
'https': 'http://localhost:9091'
})
self.assertEqual(get_proxies(cycles), {
'http': 'http://localhost:9092',
'https': 'http://localhost:9093'
})
@patch('searx.poolrequests.get_global_proxies')
def test_request(self, mock_get_global_proxies):
method = 'GET'
url = 'http://localhost'
custom_proxies = {
'https': 'http://localhost:1080'
}
global_proxies = {
'http': 'http://localhost:9092',
'https': 'http://localhost:9093'
}
mock_get_global_proxies.return_value = global_proxies
# check the global proxies usage
with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method:
searx.poolrequests.request(method, url)
mock_method.assert_called_once_with(method=method, url=url, proxies=global_proxies)
# check if the proxies parameter overrides the global proxies
with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method:
searx.poolrequests.request(method, url, proxies=custom_proxies)
mock_method.assert_called_once_with(method=method, url=url, proxies=custom_proxies)