Compare commits

...

3 Commits

Author SHA1 Message Date
Bnyro 0973581b89
Merge 443fcc7233 into 10d3af84b8 2024-11-19 17:16:42 +01:00
Markus Heiser 10d3af84b8 [fix] engine: duckduckgo - don't quote query string
The query string send to DDG must not be qouted.

The query string was URL-qouted in #4011, but the URL-qouted query string result
in unexpected *URL decoded* and other garbish results as reported in #4019
and #4020.  To test compare the results of a query like::

    !ddg Häuser und Straßen :de
    !ddg Häuser und Straßen :all
    !ddg 房屋和街道 :all
    !ddg 房屋和街道 :zh

Closed:

- [#4019] https://github.com/searxng/searxng/issues/4019
- [#4020] https://github.com/searxng/searxng/issues/4020

Related:

- [#4011] https://github.com/searxng/searxng/pull/4011

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-17 18:14:22 +01:00
Bnyro 443fcc7233 [feat] metrics: support for open metrics 2024-10-15 11:34:17 +02:00
6 changed files with 129 additions and 16 deletions

View File

@ -6,7 +6,7 @@ DuckDuckGo Lite
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import re import re
from urllib.parse import urlencode, quote_plus from urllib.parse import urlencode
import json import json
import babel import babel
import lxml.html import lxml.html
@ -263,7 +263,7 @@ def request(query, params):
params['url'] = url params['url'] = url
params['method'] = 'POST' params['method'] = 'POST'
params['data']['q'] = quote_plus(query) params['data']['q'] = query
# The API is not documented, so we do some reverse engineering and emulate # The API is not documented, so we do some reverse engineering and emulate
# what https://html.duckduckgo.com/html does when you press "next Page" link # what https://html.duckduckgo.com/html does when you press "next Page" link
@ -381,7 +381,11 @@ def response(resp):
zero_click_info_xpath = '//div[@id="zero_click_abstract"]' zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip() zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()
if zero_click and "Your IP address is" not in zero_click and "Your user agent:" not in zero_click: if zero_click and (
"Your IP address is" not in zero_click
and "Your user agent:" not in zero_click
and "URL Decoded:" not in zero_click
):
current_query = resp.search_params["data"].get("q") current_query = resp.search_params["data"].get("q")
results.append( results.append(

View File

@ -8,6 +8,7 @@ from timeit import default_timer
from operator import itemgetter from operator import itemgetter
from searx.engines import engines from searx.engines import engines
from searx.openmetrics import OpenMetricsFamily
from .models import HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage from .models import HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage
from .error_recorder import count_error, count_exception, errors_per_engines from .error_recorder import count_error, count_exception, errors_per_engines
@ -149,7 +150,9 @@ def get_reliabilities(engline_name_list, checker_results):
checker_result = checker_results.get(engine_name, {}) checker_result = checker_results.get(engine_name, {})
checker_success = checker_result.get('success', True) checker_success = checker_result.get('success', True)
errors = engine_errors.get(engine_name) or [] errors = engine_errors.get(engine_name) or []
if counter('engine', engine_name, 'search', 'count', 'sent') == 0: sent_count = counter('engine', engine_name, 'search', 'count', 'sent')
if sent_count == 0:
# no request # no request
reliability = None reliability = None
elif checker_success and not errors: elif checker_success and not errors:
@ -164,8 +167,9 @@ def get_reliabilities(engline_name_list, checker_results):
reliabilities[engine_name] = { reliabilities[engine_name] = {
'reliability': reliability, 'reliability': reliability,
'sent_count': sent_count,
'errors': errors, 'errors': errors,
'checker': checker_results.get(engine_name, {}).get('errors', {}), 'checker': checker_result.get('errors', {}),
} }
return reliabilities return reliabilities
@ -245,3 +249,53 @@ def get_engines_stats(engine_name_list):
'max_time': math.ceil(max_time_total or 0), 'max_time': math.ceil(max_time_total or 0),
'max_result_count': math.ceil(max_result_count or 0), 'max_result_count': math.ceil(max_result_count or 0),
} }
def openmetrics(engine_stats, engine_reliabilities):
metrics = [
OpenMetricsFamily(
key="searxng_engines_response_time_total_seconds",
type_hint="gauge",
help_hint="The average total response time of the engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[engine['total'] for engine in engine_stats['time']],
),
OpenMetricsFamily(
key="searxng_engines_response_time_processing_seconds",
type_hint="gauge",
help_hint="The average processing response time of the engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[engine['processing'] for engine in engine_stats['time']],
),
OpenMetricsFamily(
key="searxng_engines_response_time_http_seconds",
type_hint="gauge",
help_hint="The average HTTP response time of the engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[engine['http'] for engine in engine_stats['time']],
),
OpenMetricsFamily(
key="searxng_engines_result_count_total",
type_hint="counter",
help_hint="The total amount of results returned by the engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[engine['result_count'] for engine in engine_stats['time']],
),
OpenMetricsFamily(
key="searxng_engines_request_count_total",
type_hint="counter",
help_hint="The total amount of user requests made to this engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[engine_reliabilities.get(engine['name'], {}).get('sent_count', 0) for engine in engine_stats['time']],
),
OpenMetricsFamily(
key="searxng_engines_reliability_total",
type_hint="counter",
help_hint="The overall reliability of the engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[
engine_reliabilities.get(engine['name'], {}).get('reliability', 0) for engine in engine_stats['time']
],
),
]
return "".join([str(metric) for metric in metrics])

35
searx/openmetrics.py Normal file
View File

@ -0,0 +1,35 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Module providing support for displaying data in OpenMetrics format"""
class OpenMetricsFamily: # pylint: disable=too-few-public-methods
"""A family of metrics.
The key parameter is the metric name that should be used (snake case).
The type_hint parameter must be one of 'counter', 'gauge', 'histogram', 'summary'.
The help_hint parameter is a short string explaining the metric.
The data_info parameter is a dictionary of descriptionary parameters for the data point (e.g. request method/path).
The data parameter is a flat list of the actual data in shape of a primive type.
See https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md for more information.
"""
def __init__(self, key: str, type_hint: str, help_hint: str, data_info: list, data: list):
self.key = key
self.type_hint = type_hint
self.help_hint = help_hint
self.data_info = data_info
self.data = data
def __str__(self):
text_representation = f"""# HELP {self.key} {self.help_hint}
# TYPE {self.key} {self.type_hint}
"""
for i in range(0, len(self.data_info)):
if not self.data[i] and self.data[i] != 0:
continue
info_representation = ','.join([f"{key}=\"{value}\"" for (key, value) in self.data_info[i].items()])
text_representation += f"{self.key}{{{info_representation}}} {self.data[i]}\n"
return text_representation

View File

@ -12,6 +12,10 @@ general:
contact_url: false contact_url: false
# record stats # record stats
enable_metrics: true enable_metrics: true
# expose stats in open metrics format at /metrics
# leave empty to disable (no password set)
# open_metrics: <password>
open_metrics: ''
brand: brand:
new_issue_url: https://github.com/searxng/searxng/issues/new new_issue_url: https://github.com/searxng/searxng/issues/new

View File

@ -143,6 +143,7 @@ SCHEMA = {
'contact_url': SettingsValue((None, False, str), None), 'contact_url': SettingsValue((None, False, str), None),
'donation_url': SettingsValue((bool, str), "https://docs.searxng.org/donate.html"), 'donation_url': SettingsValue((bool, str), "https://docs.searxng.org/donate.html"),
'enable_metrics': SettingsValue(bool, True), 'enable_metrics': SettingsValue(bool, True),
'open_metrics': SettingsValue(str, ''),
}, },
'brand': { 'brand': {
'issue_url': SettingsValue(str, 'https://github.com/searxng/searxng/issues'), 'issue_url': SettingsValue(str, 'https://github.com/searxng/searxng/issues'),

View File

@ -87,10 +87,7 @@ from searx.webadapter import (
get_selected_categories, get_selected_categories,
parse_lang, parse_lang,
) )
from searx.utils import ( from searx.utils import gen_useragent, dict_subset
gen_useragent,
dict_subset,
)
from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
from searx.query import RawTextQuery from searx.query import RawTextQuery
from searx.plugins import Plugin, plugins, initialize as plugin_initialize from searx.plugins import Plugin, plugins, initialize as plugin_initialize
@ -104,13 +101,7 @@ from searx.answerers import (
answerers, answerers,
ask, ask,
) )
from searx.metrics import ( from searx.metrics import get_engines_stats, get_engine_errors, get_reliabilities, histogram, counter, openmetrics
get_engines_stats,
get_engine_errors,
get_reliabilities,
histogram,
counter,
)
from searx.flaskfix import patch_application from searx.flaskfix import patch_application
from searx.locales import ( from searx.locales import (
@ -1218,6 +1209,30 @@ def stats_checker():
return jsonify(result) return jsonify(result)
@app.route('/metrics')
def stats_open_metrics():
password = settings['general'].get("open_metrics")
if not (settings['general'].get("enable_metrics") and password):
return Response('open metrics is disabled', status=404, mimetype='text/plain')
if not request.authorization or request.authorization.password != password:
return Response('access forbidden', status=401, mimetype='text/plain')
filtered_engines = dict(filter(lambda kv: request.preferences.validate_token(kv[1]), engines.items()))
checker_results = checker_get_result()
checker_results = (
checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
)
engine_stats = get_engines_stats(filtered_engines)
engine_reliabilities = get_reliabilities(filtered_engines, checker_results)
metrics_text = openmetrics(engine_stats, engine_reliabilities)
return Response(metrics_text, mimetype='text/plain')
@app.route('/robots.txt', methods=['GET']) @app.route('/robots.txt', methods=['GET'])
def robots(): def robots():
return Response( return Response(