From 9f7244d6f16d139298dd7efe196e7d559b2e3ddb Mon Sep 17 00:00:00 2001 From: Grant Lanham Date: Sun, 25 Aug 2024 00:23:25 -0400 Subject: [PATCH 1/3] Implement "engine_exc_info" to allow removing exception logging in Checker --- searx/search/checker/impl.py | 5 ++++- searx/search/processors/abstract.py | 6 ++++-- searx/search/processors/online.py | 11 ++++++----- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index cf1f03449..15b925410 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -357,6 +357,7 @@ class Checker: # pylint: disable=missing-class-docstring def __init__(self, processor: EngineProcessor): self.processor = processor + self.processor.engine_exc_info = False # Remove exception information from errors to reduce verbosity self.tests = self.processor.get_tests() self.test_results = TestResults() @@ -418,8 +419,10 @@ class Checker: # pylint: disable=missing-class-docstring result_container_check.check_basic() return result_container_check - def run_test(self, test_name): + def run_test(self, test_name: str): test_parameters = self.tests[test_name] + # Not really a warning, but an info log will not appear + logger.warning('running test: %s', test_name) search_query_list = list(Checker.search_query_matrix_iterator(self.engineref_list, test_parameters['matrix'])) rct_list = [self.get_result_container_tests(test_name, search_query) for search_query in search_query_list] stop_test = False diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index f89302a92..7633d1f88 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -3,6 +3,7 @@ """ +from logging import Logger import threading from abc import abstractmethod, ABC from timeit import default_timer @@ -58,12 +59,13 @@ class SuspendedStatus: class EngineProcessor(ABC): """Base classes used for all types of request processors.""" - __slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger' + __slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger', 'engine_exc_info' def __init__(self, engine, engine_name: str): self.engine = engine self.engine_name = engine_name - self.logger = engines[engine_name].logger + self.logger: Logger = engines[engine_name].logger + self.engine_exc_info = True key = get_network(self.engine_name) key = id(key) if key else self.engine_name self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index acc3792f5..8e9307613 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -180,20 +180,21 @@ class OnlineProcessor(EngineProcessor): self.logger.exception( "requests exception (search duration : {0} s, timeout: {1} s) : {2}".format( default_timer() - start_time, timeout_limit, e - ) + ), + exc_info=self.engine_exc_info, ) except SearxEngineCaptchaException as e: self.handle_exception(result_container, e, suspend=True) - self.logger.exception('CAPTCHA') + self.logger.exception('CAPTCHA', exc_info=self.engine_exc_info) except SearxEngineTooManyRequestsException as e: self.handle_exception(result_container, e, suspend=True) - self.logger.exception('Too many requests') + self.logger.exception('Too many requests', exc_info=self.engine_exc_info) except SearxEngineAccessDeniedException as e: self.handle_exception(result_container, e, suspend=True) - self.logger.exception('SearXNG is blocked') + self.logger.exception('SearXNG is blocked', exc_info=self.engine_exc_info) except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) - self.logger.exception('exception : {0}'.format(e)) + self.logger.exception('exception : {0}'.format(e), exc_info=self.engine_exc_info) def get_default_tests(self): tests = {} From 118a748fba8e80da0f330981be97c3e57411f376 Mon Sep 17 00:00:00 2001 From: Grant Lanham Date: Sun, 25 Aug 2024 01:03:28 -0400 Subject: [PATCH 2/3] Inject log_level into error_recorder format --- searx/metrics/error_recorder.py | 41 ++++++++++++++++++++++------- searx/search/processors/abstract.py | 6 +++-- searx/search/processors/online.py | 1 + 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/searx/metrics/error_recorder.py b/searx/metrics/error_recorder.py index df25e8d41..3954226a4 100644 --- a/searx/metrics/error_recorder.py +++ b/searx/metrics/error_recorder.py @@ -6,6 +6,7 @@ import inspect from json import JSONDecodeError from urllib.parse import urlparse from httpx import HTTPError, HTTPStatusError +import logging from searx.exceptions import ( SearxXPathSyntaxException, SearxEngineXPathException, @@ -30,10 +31,20 @@ class ErrorContext: # pylint: disable=missing-class-docstring 'log_message', 'log_parameters', 'secondary', + 'log_level', ) def __init__( # pylint: disable=too-many-arguments - self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary + self, + filename, + function, + line_no, + code, + exception_classname, + log_message, + log_parameters, + secondary, + log_level=logging.WARN, ): self.filename = filename self.function = function @@ -43,6 +54,7 @@ class ErrorContext: # pylint: disable=missing-class-docstring self.log_message = log_message self.log_parameters = log_parameters self.secondary = secondary + self.log_level: int = log_level def __eq__(self, o) -> bool: # pylint: disable=invalid-name if not isinstance(o, ErrorContext): @@ -56,6 +68,7 @@ class ErrorContext: # pylint: disable=missing-class-docstring and self.log_message == o.log_message and self.log_parameters == o.log_parameters and self.secondary == o.secondary + and self.log_level == o.log_level ) def __hash__(self): @@ -69,11 +82,12 @@ class ErrorContext: # pylint: disable=missing-class-docstring self.log_message, self.log_parameters, self.secondary, + self.log_level, ) ) def __repr__(self): - return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".format( + return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}), {!r}, {!r}".format( self.filename, self.line_no, self.code, @@ -81,13 +95,14 @@ class ErrorContext: # pylint: disable=missing-class-docstring self.log_message, self.log_parameters, self.secondary, + self.log_level, ) def add_error_context(engine_name: str, error_context: ErrorContext) -> None: errors_for_engine = errors_per_engines.setdefault(engine_name, {}) errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1 - engines[engine_name].logger.warning('%s', str(error_context)) + engines[engine_name].logger.log(error_context.log_level, '%s', str(error_context)) def get_trace(traces): @@ -157,7 +172,9 @@ def get_exception_classname(exc: Exception) -> str: return exc_module + '.' + exc_name -def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext: +def get_error_context( + framerecords, exception_classname, log_message, log_parameters, secondary, log_level: int +) -> ErrorContext: searx_frame = get_trace(framerecords) filename = searx_frame.filename if filename.startswith(searx_parent_dir): @@ -166,30 +183,36 @@ def get_error_context(framerecords, exception_classname, log_message, log_parame line_no = searx_frame.lineno code = searx_frame.code_context[0].strip() del framerecords - return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary) + return ErrorContext( + filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary, log_level + ) -def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None: +def count_exception(engine_name: str, exc: Exception, secondary: bool = False, log_level=logging.WARN) -> None: if not settings['general']['enable_metrics']: return framerecords = inspect.trace() try: exception_classname = get_exception_classname(exc) log_parameters = get_messages(exc, framerecords[-1][1]) - error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary) + error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary, log_level) add_error_context(engine_name, error_context) finally: del framerecords def count_error( - engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False + engine_name: str, + log_message: str, + log_parameters: typing.Optional[typing.Tuple] = None, + secondary: bool = False, + log_level: int = logging.WARN, ) -> None: if not settings['general']['enable_metrics']: return framerecords = list(reversed(inspect.stack()[1:])) try: - error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary) + error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary, log_level) add_error_context(engine_name, error_context) finally: del framerecords diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index 7633d1f88..168f73b06 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -3,6 +3,7 @@ """ +import logging from logging import Logger import threading from abc import abstractmethod, ABC @@ -96,10 +97,11 @@ class EngineProcessor(ABC): result_container.add_unresponsive_engine(self.engine_name, error_message) # metrics counter_inc('engine', self.engine_name, 'search', 'count', 'error') + log_level = logging.WARN if self.engine_exc_info else logging.NOTSET if isinstance(exception_or_message, BaseException): - count_exception(self.engine_name, exception_or_message) + count_exception(self.engine_name, exception_or_message, log_level=log_level) else: - count_error(self.engine_name, exception_or_message) + count_error(self.engine_name, exception_or_message, log_level=log_level) # suspend the engine ? if suspend: suspended_time = None diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 8e9307613..84eea9fe8 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -126,6 +126,7 @@ class OnlineProcessor(EngineProcessor): self.engine_name, '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), (status_code, reason, hostname), + self.engine_exc_info, secondary=True, ) From 27aa9c4cb1f03c70807c8ed270d80a4d8ffc4e1f Mon Sep 17 00:00:00 2001 From: Grant Lanham Date: Sun, 25 Aug 2024 16:34:10 -0400 Subject: [PATCH 3/3] Updates to naming, pylint fixing --- searx/metrics/error_recorder.py | 2 +- searx/search/checker/impl.py | 4 ++-- searx/search/processors/abstract.py | 13 ++++++++----- searx/search/processors/online.py | 12 ++++++------ 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/searx/metrics/error_recorder.py b/searx/metrics/error_recorder.py index 3954226a4..bb3d76adc 100644 --- a/searx/metrics/error_recorder.py +++ b/searx/metrics/error_recorder.py @@ -3,10 +3,10 @@ import typing import inspect +import logging from json import JSONDecodeError from urllib.parse import urlparse from httpx import HTTPError, HTTPStatusError -import logging from searx.exceptions import ( SearxXPathSyntaxException, SearxEngineXPathException, diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index 15b925410..659c9c50b 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -357,7 +357,7 @@ class Checker: # pylint: disable=missing-class-docstring def __init__(self, processor: EngineProcessor): self.processor = processor - self.processor.engine_exc_info = False # Remove exception information from errors to reduce verbosity + self.processor.log_engine_exc_info = False # Remove exception information from errors to reduce verbosity self.tests = self.processor.get_tests() self.test_results = TestResults() @@ -422,7 +422,7 @@ class Checker: # pylint: disable=missing-class-docstring def run_test(self, test_name: str): test_parameters = self.tests[test_name] # Not really a warning, but an info log will not appear - logger.warning('running test: %s', test_name) + logger.warning('---%s---', test_name) search_query_list = list(Checker.search_query_matrix_iterator(self.engineref_list, test_parameters['matrix'])) rct_list = [self.get_result_container_tests(test_name, search_query) for search_query in search_query_list] stop_test = False diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index 168f73b06..dd6e1c13d 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -60,13 +60,13 @@ class SuspendedStatus: class EngineProcessor(ABC): """Base classes used for all types of request processors.""" - __slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger', 'engine_exc_info' + __slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger', 'log_engine_exc_info' def __init__(self, engine, engine_name: str): self.engine = engine self.engine_name = engine_name self.logger: Logger = engines[engine_name].logger - self.engine_exc_info = True + self.log_engine_exc_info = True key = get_network(self.engine_name) key = id(key) if key else self.engine_name self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) @@ -85,6 +85,10 @@ class EngineProcessor(ABC): def has_initialize_function(self): return hasattr(self.engine, 'init') + @property + def metrics_log_level(self) -> int: + return logging.WARN if self.log_engine_exc_info else logging.NOTSET + def handle_exception(self, result_container, exception_or_message, suspend=False): # update result_container if isinstance(exception_or_message, BaseException): @@ -97,11 +101,10 @@ class EngineProcessor(ABC): result_container.add_unresponsive_engine(self.engine_name, error_message) # metrics counter_inc('engine', self.engine_name, 'search', 'count', 'error') - log_level = logging.WARN if self.engine_exc_info else logging.NOTSET if isinstance(exception_or_message, BaseException): - count_exception(self.engine_name, exception_or_message, log_level=log_level) + count_exception(self.engine_name, exception_or_message, log_level=self.metrics_log_level) else: - count_error(self.engine_name, exception_or_message, log_level=log_level) + count_error(self.engine_name, exception_or_message, log_level=self.metrics_log_level) # suspend the engine ? if suspend: suspended_time = None diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 84eea9fe8..cad63aae0 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -126,8 +126,8 @@ class OnlineProcessor(EngineProcessor): self.engine_name, '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), (status_code, reason, hostname), - self.engine_exc_info, secondary=True, + log_level=self.metrics_log_level, ) return response @@ -182,20 +182,20 @@ class OnlineProcessor(EngineProcessor): "requests exception (search duration : {0} s, timeout: {1} s) : {2}".format( default_timer() - start_time, timeout_limit, e ), - exc_info=self.engine_exc_info, + exc_info=self.log_engine_exc_info, ) except SearxEngineCaptchaException as e: self.handle_exception(result_container, e, suspend=True) - self.logger.exception('CAPTCHA', exc_info=self.engine_exc_info) + self.logger.exception('CAPTCHA', exc_info=self.log_engine_exc_info) except SearxEngineTooManyRequestsException as e: self.handle_exception(result_container, e, suspend=True) - self.logger.exception('Too many requests', exc_info=self.engine_exc_info) + self.logger.exception('Too many requests', exc_info=self.log_engine_exc_info) except SearxEngineAccessDeniedException as e: self.handle_exception(result_container, e, suspend=True) - self.logger.exception('SearXNG is blocked', exc_info=self.engine_exc_info) + self.logger.exception('SearXNG is blocked', exc_info=self.log_engine_exc_info) except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) - self.logger.exception('exception : {0}'.format(e), exc_info=self.engine_exc_info) + self.logger.exception('exception : {0}'.format(e), exc_info=self.log_engine_exc_info) def get_default_tests(self): tests = {}