Compare commits

...

3 Commits

Author SHA1 Message Date
Bnyro 0973581b89
Merge 443fcc7233 into 10d3af84b8 2024-11-19 17:16:42 +01:00
Markus Heiser 10d3af84b8 [fix] engine: duckduckgo - don't quote query string
The query string send to DDG must not be qouted.

The query string was URL-qouted in #4011, but the URL-qouted query string result
in unexpected *URL decoded* and other garbish results as reported in #4019
and #4020.  To test compare the results of a query like::

    !ddg Häuser und Straßen :de
    !ddg Häuser und Straßen :all
    !ddg 房屋和街道 :all
    !ddg 房屋和街道 :zh

Closed:

- [#4019] https://github.com/searxng/searxng/issues/4019
- [#4020] https://github.com/searxng/searxng/issues/4020

Related:

- [#4011] https://github.com/searxng/searxng/pull/4011

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-17 18:14:22 +01:00
Bnyro 443fcc7233 [feat] metrics: support for open metrics 2024-10-15 11:34:17 +02:00
6 changed files with 129 additions and 16 deletions

View File

@ -6,7 +6,7 @@ DuckDuckGo Lite
from typing import TYPE_CHECKING
import re
from urllib.parse import urlencode, quote_plus
from urllib.parse import urlencode
import json
import babel
import lxml.html
@ -263,7 +263,7 @@ def request(query, params):
params['url'] = url
params['method'] = 'POST'
params['data']['q'] = quote_plus(query)
params['data']['q'] = query
# The API is not documented, so we do some reverse engineering and emulate
# what https://html.duckduckgo.com/html does when you press "next Page" link
@ -381,7 +381,11 @@ def response(resp):
zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()
if zero_click and "Your IP address is" not in zero_click and "Your user agent:" not in zero_click:
if zero_click and (
"Your IP address is" not in zero_click
and "Your user agent:" not in zero_click
and "URL Decoded:" not in zero_click
):
current_query = resp.search_params["data"].get("q")
results.append(

View File

@ -8,6 +8,7 @@ from timeit import default_timer
from operator import itemgetter
from searx.engines import engines
from searx.openmetrics import OpenMetricsFamily
from .models import HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage
from .error_recorder import count_error, count_exception, errors_per_engines
@ -149,7 +150,9 @@ def get_reliabilities(engline_name_list, checker_results):
checker_result = checker_results.get(engine_name, {})
checker_success = checker_result.get('success', True)
errors = engine_errors.get(engine_name) or []
if counter('engine', engine_name, 'search', 'count', 'sent') == 0:
sent_count = counter('engine', engine_name, 'search', 'count', 'sent')
if sent_count == 0:
# no request
reliability = None
elif checker_success and not errors:
@ -164,8 +167,9 @@ def get_reliabilities(engline_name_list, checker_results):
reliabilities[engine_name] = {
'reliability': reliability,
'sent_count': sent_count,
'errors': errors,
'checker': checker_results.get(engine_name, {}).get('errors', {}),
'checker': checker_result.get('errors', {}),
}
return reliabilities
@ -245,3 +249,53 @@ def get_engines_stats(engine_name_list):
'max_time': math.ceil(max_time_total or 0),
'max_result_count': math.ceil(max_result_count or 0),
}
def openmetrics(engine_stats, engine_reliabilities):
metrics = [
OpenMetricsFamily(
key="searxng_engines_response_time_total_seconds",
type_hint="gauge",
help_hint="The average total response time of the engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[engine['total'] for engine in engine_stats['time']],
),
OpenMetricsFamily(
key="searxng_engines_response_time_processing_seconds",
type_hint="gauge",
help_hint="The average processing response time of the engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[engine['processing'] for engine in engine_stats['time']],
),
OpenMetricsFamily(
key="searxng_engines_response_time_http_seconds",
type_hint="gauge",
help_hint="The average HTTP response time of the engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[engine['http'] for engine in engine_stats['time']],
),
OpenMetricsFamily(
key="searxng_engines_result_count_total",
type_hint="counter",
help_hint="The total amount of results returned by the engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[engine['result_count'] for engine in engine_stats['time']],
),
OpenMetricsFamily(
key="searxng_engines_request_count_total",
type_hint="counter",
help_hint="The total amount of user requests made to this engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[engine_reliabilities.get(engine['name'], {}).get('sent_count', 0) for engine in engine_stats['time']],
),
OpenMetricsFamily(
key="searxng_engines_reliability_total",
type_hint="counter",
help_hint="The overall reliability of the engine",
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
data=[
engine_reliabilities.get(engine['name'], {}).get('reliability', 0) for engine in engine_stats['time']
],
),
]
return "".join([str(metric) for metric in metrics])

35
searx/openmetrics.py Normal file
View File

@ -0,0 +1,35 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Module providing support for displaying data in OpenMetrics format"""
class OpenMetricsFamily: # pylint: disable=too-few-public-methods
"""A family of metrics.
The key parameter is the metric name that should be used (snake case).
The type_hint parameter must be one of 'counter', 'gauge', 'histogram', 'summary'.
The help_hint parameter is a short string explaining the metric.
The data_info parameter is a dictionary of descriptionary parameters for the data point (e.g. request method/path).
The data parameter is a flat list of the actual data in shape of a primive type.
See https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md for more information.
"""
def __init__(self, key: str, type_hint: str, help_hint: str, data_info: list, data: list):
self.key = key
self.type_hint = type_hint
self.help_hint = help_hint
self.data_info = data_info
self.data = data
def __str__(self):
text_representation = f"""# HELP {self.key} {self.help_hint}
# TYPE {self.key} {self.type_hint}
"""
for i in range(0, len(self.data_info)):
if not self.data[i] and self.data[i] != 0:
continue
info_representation = ','.join([f"{key}=\"{value}\"" for (key, value) in self.data_info[i].items()])
text_representation += f"{self.key}{{{info_representation}}} {self.data[i]}\n"
return text_representation

View File

@ -12,6 +12,10 @@ general:
contact_url: false
# record stats
enable_metrics: true
# expose stats in open metrics format at /metrics
# leave empty to disable (no password set)
# open_metrics: <password>
open_metrics: ''
brand:
new_issue_url: https://github.com/searxng/searxng/issues/new

View File

@ -143,6 +143,7 @@ SCHEMA = {
'contact_url': SettingsValue((None, False, str), None),
'donation_url': SettingsValue((bool, str), "https://docs.searxng.org/donate.html"),
'enable_metrics': SettingsValue(bool, True),
'open_metrics': SettingsValue(str, ''),
},
'brand': {
'issue_url': SettingsValue(str, 'https://github.com/searxng/searxng/issues'),

View File

@ -87,10 +87,7 @@ from searx.webadapter import (
get_selected_categories,
parse_lang,
)
from searx.utils import (
gen_useragent,
dict_subset,
)
from searx.utils import gen_useragent, dict_subset
from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
from searx.query import RawTextQuery
from searx.plugins import Plugin, plugins, initialize as plugin_initialize
@ -104,13 +101,7 @@ from searx.answerers import (
answerers,
ask,
)
from searx.metrics import (
get_engines_stats,
get_engine_errors,
get_reliabilities,
histogram,
counter,
)
from searx.metrics import get_engines_stats, get_engine_errors, get_reliabilities, histogram, counter, openmetrics
from searx.flaskfix import patch_application
from searx.locales import (
@ -1218,6 +1209,30 @@ def stats_checker():
return jsonify(result)
@app.route('/metrics')
def stats_open_metrics():
password = settings['general'].get("open_metrics")
if not (settings['general'].get("enable_metrics") and password):
return Response('open metrics is disabled', status=404, mimetype='text/plain')
if not request.authorization or request.authorization.password != password:
return Response('access forbidden', status=401, mimetype='text/plain')
filtered_engines = dict(filter(lambda kv: request.preferences.validate_token(kv[1]), engines.items()))
checker_results = checker_get_result()
checker_results = (
checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
)
engine_stats = get_engines_stats(filtered_engines)
engine_reliabilities = get_reliabilities(filtered_engines, checker_results)
metrics_text = openmetrics(engine_stats, engine_reliabilities)
return Response(metrics_text, mimetype='text/plain')
@app.route('/robots.txt', methods=['GET'])
def robots():
return Response(