mirror of https://github.com/searxng/searxng.git
Compare commits
8 Commits
ed9c5f48e7
...
ab27d6b45b
Author | SHA1 | Date |
---|---|---|
Grant Lanham Jr | ab27d6b45b | |
Markus Heiser | 0f9694c90b | |
Markus Heiser | ccc4f30b20 | |
Markus Heiser | c4b874e9b0 | |
Markus Heiser | 7c4e4ebd40 | |
Grant Lanham | 27aa9c4cb1 | |
Grant Lanham | 118a748fba | |
Grant Lanham | 9f7244d6f1 |
|
@ -4,22 +4,27 @@ Welcome to SearXNG
|
|||
|
||||
*Search without being tracked.*
|
||||
|
||||
SearXNG is a free internet metasearch engine which aggregates results from more
|
||||
than 70 search services. Users are neither tracked nor profiled. Additionally,
|
||||
SearXNG can be used over Tor for online anonymity.
|
||||
.. jinja:: searx
|
||||
|
||||
SearXNG is a free internet metasearch engine which aggregates results from up
|
||||
to {{engines | length}} :ref:`search services <configured engines>`. Users
|
||||
are neither tracked nor profiled. Additionally, SearXNG can be used over Tor
|
||||
for online anonymity.
|
||||
|
||||
Get started with SearXNG by using one of the instances listed at searx.space_.
|
||||
If you don't trust anyone, you can set up your own, see :ref:`installation`.
|
||||
|
||||
.. sidebar:: features
|
||||
.. jinja:: searx
|
||||
|
||||
.. sidebar:: features
|
||||
|
||||
- :ref:`self hosted <installation>`
|
||||
- :ref:`no user tracking / no profiling <SearXNG protect privacy>`
|
||||
- script & cookies are optional
|
||||
- secure, encrypted connections
|
||||
- :ref:`about 200 search engines <configured engines>`
|
||||
- `about 60 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_
|
||||
- about 100 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_
|
||||
- :ref:`{{engines | length}} search engines <configured engines>`
|
||||
- `58 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_
|
||||
- about 70 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_
|
||||
- :ref:`easy integration of search engines <demo online engine>`
|
||||
- professional development: `CI <https://github.com/searxng/searxng/actions>`_,
|
||||
`quality assurance <https://dev.searxng.org/>`_ &
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Internet Archive scholar(science)
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import html_to_text
|
||||
|
||||
about = {
|
||||
"website": "https://scholar.archive.org/",
|
||||
"wikidata_id": "Q115667709",
|
||||
"official_api_documentation": "https://scholar.archive.org/api/redoc",
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
|
||||
base_url = "https://scholar.archive.org"
|
||||
results_per_page = 15
|
||||
|
||||
|
||||
def request(query, params):
|
||||
args = {
|
||||
"q": query,
|
||||
"limit": results_per_page,
|
||||
"offset": (params["pageno"] - 1) * results_per_page,
|
||||
}
|
||||
params["url"] = f"{base_url}/search?{urlencode(args)}"
|
||||
params["headers"]["Accept"] = "application/json"
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
json = resp.json()
|
||||
|
||||
for result in json["results"]:
|
||||
publishedDate, content, doi = None, '', None
|
||||
|
||||
if result['biblio'].get('release_date'):
|
||||
publishedDate = datetime.strptime(result['biblio']['release_date'], "%Y-%m-%d")
|
||||
|
||||
if len(result['abstracts']) > 0:
|
||||
content = result['abstracts'][0].get('body')
|
||||
elif len(result['_highlights']) > 0:
|
||||
content = result['_highlights'][0]
|
||||
|
||||
if len(result['releases']) > 0:
|
||||
doi = result['releases'][0].get('doi')
|
||||
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'url': result['fulltext']['access_url'],
|
||||
'title': result['biblio'].get('title') or result['biblio'].get('container_name'),
|
||||
'content': html_to_text(content),
|
||||
'publisher': result['biblio'].get('publisher'),
|
||||
'doi': doi,
|
||||
'journal': result['biblio'].get('container_name'),
|
||||
'authors': result['biblio'].get('contrib_names'),
|
||||
'tags': result['tags'],
|
||||
'publishedDate': publishedDate,
|
||||
'issns': result['biblio'].get('issns'),
|
||||
'pdf_url': result['fulltext'].get('access_url'),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
|
@ -27,7 +27,7 @@ categories = ['images']
|
|||
paging = True
|
||||
|
||||
endpoint = 'photos'
|
||||
base_url = 'https://loc.gov'
|
||||
base_url = 'https://www.loc.gov'
|
||||
search_string = "/{endpoint}/?sp={page}&{query}&fo=json"
|
||||
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
import typing
|
||||
import inspect
|
||||
import logging
|
||||
from json import JSONDecodeError
|
||||
from urllib.parse import urlparse
|
||||
from httpx import HTTPError, HTTPStatusError
|
||||
|
@ -30,10 +31,20 @@ class ErrorContext: # pylint: disable=missing-class-docstring
|
|||
'log_message',
|
||||
'log_parameters',
|
||||
'secondary',
|
||||
'log_level',
|
||||
)
|
||||
|
||||
def __init__( # pylint: disable=too-many-arguments
|
||||
self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary
|
||||
self,
|
||||
filename,
|
||||
function,
|
||||
line_no,
|
||||
code,
|
||||
exception_classname,
|
||||
log_message,
|
||||
log_parameters,
|
||||
secondary,
|
||||
log_level=logging.WARN,
|
||||
):
|
||||
self.filename = filename
|
||||
self.function = function
|
||||
|
@ -43,6 +54,7 @@ class ErrorContext: # pylint: disable=missing-class-docstring
|
|||
self.log_message = log_message
|
||||
self.log_parameters = log_parameters
|
||||
self.secondary = secondary
|
||||
self.log_level: int = log_level
|
||||
|
||||
def __eq__(self, o) -> bool: # pylint: disable=invalid-name
|
||||
if not isinstance(o, ErrorContext):
|
||||
|
@ -56,6 +68,7 @@ class ErrorContext: # pylint: disable=missing-class-docstring
|
|||
and self.log_message == o.log_message
|
||||
and self.log_parameters == o.log_parameters
|
||||
and self.secondary == o.secondary
|
||||
and self.log_level == o.log_level
|
||||
)
|
||||
|
||||
def __hash__(self):
|
||||
|
@ -69,11 +82,12 @@ class ErrorContext: # pylint: disable=missing-class-docstring
|
|||
self.log_message,
|
||||
self.log_parameters,
|
||||
self.secondary,
|
||||
self.log_level,
|
||||
)
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".format(
|
||||
return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}), {!r}, {!r}".format(
|
||||
self.filename,
|
||||
self.line_no,
|
||||
self.code,
|
||||
|
@ -81,13 +95,14 @@ class ErrorContext: # pylint: disable=missing-class-docstring
|
|||
self.log_message,
|
||||
self.log_parameters,
|
||||
self.secondary,
|
||||
self.log_level,
|
||||
)
|
||||
|
||||
|
||||
def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
|
||||
errors_for_engine = errors_per_engines.setdefault(engine_name, {})
|
||||
errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
|
||||
engines[engine_name].logger.warning('%s', str(error_context))
|
||||
engines[engine_name].logger.log(error_context.log_level, '%s', str(error_context))
|
||||
|
||||
|
||||
def get_trace(traces):
|
||||
|
@ -157,7 +172,9 @@ def get_exception_classname(exc: Exception) -> str:
|
|||
return exc_module + '.' + exc_name
|
||||
|
||||
|
||||
def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext:
|
||||
def get_error_context(
|
||||
framerecords, exception_classname, log_message, log_parameters, secondary, log_level: int
|
||||
) -> ErrorContext:
|
||||
searx_frame = get_trace(framerecords)
|
||||
filename = searx_frame.filename
|
||||
if filename.startswith(searx_parent_dir):
|
||||
|
@ -166,30 +183,36 @@ def get_error_context(framerecords, exception_classname, log_message, log_parame
|
|||
line_no = searx_frame.lineno
|
||||
code = searx_frame.code_context[0].strip()
|
||||
del framerecords
|
||||
return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary)
|
||||
return ErrorContext(
|
||||
filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary, log_level
|
||||
)
|
||||
|
||||
|
||||
def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None:
|
||||
def count_exception(engine_name: str, exc: Exception, secondary: bool = False, log_level=logging.WARN) -> None:
|
||||
if not settings['general']['enable_metrics']:
|
||||
return
|
||||
framerecords = inspect.trace()
|
||||
try:
|
||||
exception_classname = get_exception_classname(exc)
|
||||
log_parameters = get_messages(exc, framerecords[-1][1])
|
||||
error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary)
|
||||
error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary, log_level)
|
||||
add_error_context(engine_name, error_context)
|
||||
finally:
|
||||
del framerecords
|
||||
|
||||
|
||||
def count_error(
|
||||
engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False
|
||||
engine_name: str,
|
||||
log_message: str,
|
||||
log_parameters: typing.Optional[typing.Tuple] = None,
|
||||
secondary: bool = False,
|
||||
log_level: int = logging.WARN,
|
||||
) -> None:
|
||||
if not settings['general']['enable_metrics']:
|
||||
return
|
||||
framerecords = list(reversed(inspect.stack()[1:]))
|
||||
try:
|
||||
error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary)
|
||||
error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary, log_level)
|
||||
add_error_context(engine_name, error_context)
|
||||
finally:
|
||||
del framerecords
|
||||
|
|
|
@ -233,8 +233,7 @@ class Network:
|
|||
del kwargs['raise_for_httperror']
|
||||
return do_raise_for_httperror
|
||||
|
||||
@staticmethod
|
||||
def patch_response(response, do_raise_for_httperror):
|
||||
def patch_response(self, response, do_raise_for_httperror):
|
||||
if isinstance(response, httpx.Response):
|
||||
# requests compatibility (response is not streamed)
|
||||
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
||||
|
@ -242,8 +241,11 @@ class Network:
|
|||
|
||||
# raise an exception
|
||||
if do_raise_for_httperror:
|
||||
try:
|
||||
raise_for_httperror(response)
|
||||
|
||||
except:
|
||||
self._logger.warning(f"HTTP Request failed: {response.request.method} {response.request.url}")
|
||||
raise
|
||||
return response
|
||||
|
||||
def is_valid_response(self, response):
|
||||
|
@ -269,7 +271,7 @@ class Network:
|
|||
else:
|
||||
response = await client.request(method, url, **kwargs)
|
||||
if self.is_valid_response(response) or retries <= 0:
|
||||
return Network.patch_response(response, do_raise_for_httperror)
|
||||
return self.patch_response(response, do_raise_for_httperror)
|
||||
except httpx.RemoteProtocolError as e:
|
||||
if not was_disconnected:
|
||||
# the server has closed the connection:
|
||||
|
|
|
@ -357,6 +357,7 @@ class Checker: # pylint: disable=missing-class-docstring
|
|||
|
||||
def __init__(self, processor: EngineProcessor):
|
||||
self.processor = processor
|
||||
self.processor.log_engine_exc_info = False # Remove exception information from errors to reduce verbosity
|
||||
self.tests = self.processor.get_tests()
|
||||
self.test_results = TestResults()
|
||||
|
||||
|
@ -418,8 +419,10 @@ class Checker: # pylint: disable=missing-class-docstring
|
|||
result_container_check.check_basic()
|
||||
return result_container_check
|
||||
|
||||
def run_test(self, test_name):
|
||||
def run_test(self, test_name: str):
|
||||
test_parameters = self.tests[test_name]
|
||||
# Not really a warning, but an info log will not appear
|
||||
logger.warning('---%s---', test_name)
|
||||
search_query_list = list(Checker.search_query_matrix_iterator(self.engineref_list, test_parameters['matrix']))
|
||||
rct_list = [self.get_result_container_tests(test_name, search_query) for search_query in search_query_list]
|
||||
stop_test = False
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
"""
|
||||
|
||||
import logging
|
||||
from logging import Logger
|
||||
import threading
|
||||
from abc import abstractmethod, ABC
|
||||
from timeit import default_timer
|
||||
|
@ -58,12 +60,13 @@ class SuspendedStatus:
|
|||
class EngineProcessor(ABC):
|
||||
"""Base classes used for all types of request processors."""
|
||||
|
||||
__slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger'
|
||||
__slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger', 'log_engine_exc_info'
|
||||
|
||||
def __init__(self, engine, engine_name: str):
|
||||
self.engine = engine
|
||||
self.engine_name = engine_name
|
||||
self.logger = engines[engine_name].logger
|
||||
self.logger: Logger = engines[engine_name].logger
|
||||
self.log_engine_exc_info = True
|
||||
key = get_network(self.engine_name)
|
||||
key = id(key) if key else self.engine_name
|
||||
self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
|
||||
|
@ -82,6 +85,10 @@ class EngineProcessor(ABC):
|
|||
def has_initialize_function(self):
|
||||
return hasattr(self.engine, 'init')
|
||||
|
||||
@property
|
||||
def metrics_log_level(self) -> int:
|
||||
return logging.WARN if self.log_engine_exc_info else logging.NOTSET
|
||||
|
||||
def handle_exception(self, result_container, exception_or_message, suspend=False):
|
||||
# update result_container
|
||||
if isinstance(exception_or_message, BaseException):
|
||||
|
@ -95,9 +102,9 @@ class EngineProcessor(ABC):
|
|||
# metrics
|
||||
counter_inc('engine', self.engine_name, 'search', 'count', 'error')
|
||||
if isinstance(exception_or_message, BaseException):
|
||||
count_exception(self.engine_name, exception_or_message)
|
||||
count_exception(self.engine_name, exception_or_message, log_level=self.metrics_log_level)
|
||||
else:
|
||||
count_error(self.engine_name, exception_or_message)
|
||||
count_error(self.engine_name, exception_or_message, log_level=self.metrics_log_level)
|
||||
# suspend the engine ?
|
||||
if suspend:
|
||||
suspended_time = None
|
||||
|
|
|
@ -127,6 +127,7 @@ class OnlineProcessor(EngineProcessor):
|
|||
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
|
||||
(status_code, reason, hostname),
|
||||
secondary=True,
|
||||
log_level=self.metrics_log_level,
|
||||
)
|
||||
|
||||
return response
|
||||
|
@ -137,9 +138,6 @@ class OnlineProcessor(EngineProcessor):
|
|||
self.engine.request(query, params)
|
||||
|
||||
# ignoring empty urls
|
||||
if params['url'] is None:
|
||||
return None
|
||||
|
||||
if not params['url']:
|
||||
return None
|
||||
|
||||
|
@ -180,20 +178,21 @@ class OnlineProcessor(EngineProcessor):
|
|||
self.logger.exception(
|
||||
"requests exception (search duration : {0} s, timeout: {1} s) : {2}".format(
|
||||
default_timer() - start_time, timeout_limit, e
|
||||
)
|
||||
),
|
||||
exc_info=self.log_engine_exc_info,
|
||||
)
|
||||
except SearxEngineCaptchaException as e:
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
self.logger.exception('CAPTCHA')
|
||||
self.logger.exception('CAPTCHA', exc_info=self.log_engine_exc_info)
|
||||
except SearxEngineTooManyRequestsException as e:
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
self.logger.exception('Too many requests')
|
||||
self.logger.exception('Too many requests', exc_info=self.log_engine_exc_info)
|
||||
except SearxEngineAccessDeniedException as e:
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
self.logger.exception('SearXNG is blocked')
|
||||
self.logger.exception('SearXNG is blocked', exc_info=self.log_engine_exc_info)
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
self.handle_exception(result_container, e)
|
||||
self.logger.exception('exception : {0}'.format(e))
|
||||
self.logger.exception('exception : {0}'.format(e), exc_info=self.log_engine_exc_info)
|
||||
|
||||
def get_default_tests(self):
|
||||
tests = {}
|
||||
|
|
|
@ -1622,11 +1622,6 @@ engines:
|
|||
api_site: 'askubuntu'
|
||||
categories: [it, q&a]
|
||||
|
||||
- name: internetarchivescholar
|
||||
engine: internet_archive_scholar
|
||||
shortcut: ias
|
||||
timeout: 15.0
|
||||
|
||||
- name: superuser
|
||||
engine: stackexchange
|
||||
shortcut: su
|
||||
|
|
Loading…
Reference in New Issue